In [1]:
# import tensorflow as tf
# physical_devices = tf.config.list_physical_devices('GPU') 
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization, GaussianNoise
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
# from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.regularizers import l1
from keras import initializers
# Random weights kernel_initializer=initializers.RandomNormal(stddev=0.01), 

# Preprocessing
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler, Normalizer
from sklearn.impute import SimpleImputer

# Evaluating
from sklearn.model_selection import GridSearchCV, train_test_split
import matplotlib.pyplot as plt

# Saving model
from keras.models import load_model

imputer = SimpleImputer()
MMS = MinMaxScaler()
RS = RobustScaler()
SS = StandardScaler()
Norm = Normalizer()

%matplotlib inline


In [2]:
# Import stats
stats = pd.read_csv('./Web Scraping/Power_rankings.csv')
stats

# For each year
years = [i for i in range(2007, 2020)]

# Get data and merge
for year in years:
    
    # Get year columns
    temp_df = stats[['Team', f'{year} WPower' , f'{year} LPower', f'{year} GFPower', f'{year} GAPower', f'{year} MPower']]
    
    # Rename columns
    temp_df.columns = ['Team', 'WPower', 'LPower', 'GFPower', 'GAPower', 'MPower']
    
    # Create temp vis and home df with only team and those year columns
    vis_stats, home_stats = temp_df.add_prefix('V '), temp_df.add_prefix('H ')

    # Rename columns
    home_stats.rename(columns={'H Team':'Home'}, inplace=True)
    vis_stats.rename(columns={'V Team':'Visitor'}, inplace=True)

    # Get games
    games = pd.read_csv(f'./NBA_SB/Scraping/Games/NBA {year+1} Games.csv', parse_dates=['Date'])

    # Merge on teams
    merged = games.merge(home_stats, on='Home')
    merged = merged.merge(vis_stats, on='Visitor')
    merged

    # Merge with all games
    if year == years[0]:
        all_games = merged.copy()
    else:
        all_games = pd.concat([all_games, merged])
    
    # Sort values by date
    all_games.sort_values(by='Date', inplace=True)
    all_games.reset_index(inplace=True)
    all_games.drop('index', axis=1, inplace=True)
    
    # Replace odds == 0 with 1    
    all_games['Home Odds'] = all_games['Home Odds'].replace(0.00, 1.00)
    all_games['Vis Odds'] = all_games['Vis Odds'].replace(0.00, 1.00)

all_games.to_csv('Power Rankings and Games.csv', index=False)

In [3]:
# Define x and y
x = all_games.loc[:, 'H WPower':]
x['Home Odds'], x['Vis Odds'] = all_games['Home Odds'], all_games['Vis Odds']
y = all_games['Home Win']

In [4]:
# Create the model
model = Sequential()
model.add(Dense(x.shape[1], input_dim=x.shape[1], activation='relu'))
model.add(Dense(236, activation='relu'))
model.add(Dense(118, activation='relu'))
model.add(Dense(59, activation='relu'))
model.add(Dense(30, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# sgd = SGD(lr=0.0001, momentum=0)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# LEARNING RATE SCHEDULE
rlrp = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_delta=1E-7, verbose=1)
# Early Stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
model.fit(x, y, epochs=40, batch_size=10, validation_split=0.2, verbose=1, callbacks=[rlrp, es])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40

Epoch 00019: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 20/40
Epoch 21/40
Epoch 00021: early stopping


<keras.callbacks.History at 0x7fe791761c10>

In [19]:
def make_preds(model, df):
    # Check predictions
    preds = model.predict(df)

    # Create results df
    results = all_games[['Date', 'Home Odds', 'Vis Odds', 'Home Win']]
    
    # If len(df) != len(all_games):
        # Get index of df
        # only keep results of that index

    results['Preds'] = preds
    return results

good_results = []
# Check results of betting
def calc_result(df, col, threshold, good_results):
    
    new_df = df.copy()
    bet_size = 1
    
    # Create home bet results
    conditions = [
         ((df[col] > threshold) & (df['Home Win'] == True)),
        ((df[col] > threshold) & (df['Home Win'] == False)),
        (True)
    ]
    values = [(df['Home Odds'] - 1) * bet_size, -bet_size, 0]
    # Relative bet size
    values = [(df['Home Odds'] - 1) * (df['Preds'] * 10), -(df['Preds'] * 10), 0]
    
    # Create Home Columns in Df
    new_df['Home Outcome'] = np.select(conditions, values)
    new_df['Home Bankroll'] = new_df['Home Outcome'].cumsum()
    
    # Create vis bet results
    conditions2 = [
        ((df[col] < threshold) & (df['Home Win'] == True)),
        ((df[col] < threshold) & (df['Home Win'] == False)),
        (True)
    ]
    values2 = [-bet_size, (df['Vis Odds'] - 1) * bet_size, 0]
    # Relative bet size
    values2 = [ -(1 - df['Preds']) * 10, (df['Vis Odds'] - 1) * ((1 - df['Preds']) * 10), 0] 
    
    # Create Vis Columns in Df
    new_df['Vis Outcome'] = np.select(conditions2, values2)
    new_df['Vis Bankroll'] = new_df['Vis Outcome'].cumsum()
    
    # Count how many bets made
    home_bets_made = (new_df['Home Outcome'] != 0).sum()
    vis_bets_made = (new_df['Vis Outcome'] != 0).sum()
    #### PLOT THE BANKROLL AS WELL
    
    # Save result if high enough 
    home_result = round(new_df['Home Bankroll'].iloc[-1],2)
    vis_result = round(new_df['Vis Bankroll'].iloc[-1],2)
    
    if home_result > (10 * bet_size) or vis_result > (10 * bet_size):
        result_dict = {
            'Threshold': threshold,
            'Home Res': home_result,
            'Home Bets Made': home_bets_made,
            'Percentage Bet on Home': round(home_bets_made/len(results) * 100),
            'Vis Res': vis_result,
            'Vis Bets Made': vis_bets_made,
            'Percentage Bet on Visitor': round(vis_bets_made/len(results) * 100)
        }
        good_results.append(result_dict)
        print(result_dict)
    
    return new_df, home_result, vis_result


def print_results(df, col):
    good_results = []
    for x in range(30, 90):
        threshold = x / 100
        1
        # Plot vis and home results
        if home_result > 1 or vis_result > 1:
            plt.plot(plot_df['Home Bankroll'], label='Home')
            plt.plot(plot_df['Vis Bankroll'], label='Vis')
            plt.legend()
            plt.title(('Threshold: ',threshold))
            plt.show()

    print('Good results')
    print('Amount of games: ', len(results))
    for i in good_results:
        print(i)
    return plot_df

In [20]:
good_results = []
results = make_preds(model, x)
plot_df = print_results(results, 'Preds')

Good results
Amount of games:  20306


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results['Preds'] = preds


In [13]:
# Print out df with threshold 0.89
plot_df, home_result, vis_result = calc_result(results, 'Preds', 0.89, good_results)

In [14]:
plot_df.to_csv('test.csv', index=False)

In [81]:
# Functions to test thresholds
def create_threshold_df(df, col):
    # Create dictionary of accuracy of predictions for each prediction threshold

    accuracy_dict = {'Threshold': [],'Bet on Win':[], 'Accurate Wins': [], 'Bet on Loss':[], 'Accurate Losses': []}

    for x in range(30, 90):
        threshold = x / 100
        # Count how many times Ensemble > 1.06 (Predicted Win) and Home win == True (Actual Win)

        bet_wins = len(df[df[col] > threshold])
        bet_losses = len(df[df[col] < threshold])

        wins = df.apply(lambda x: True if x[col] > threshold and x['Home Win'] == True else False, axis=1)
        losses = df.apply(lambda x: True if x[col] < threshold and x['Home Win'] == False else False, axis=1)
        win_perc = (len(wins[wins == True]) / len(df[df[col] > threshold]) * 100)
        loss_perc = (len(losses[losses == True]) / len(df[df[col] < threshold]) * 100)

        accuracy_dict['Threshold'].append(threshold)
        accuracy_dict['Bet on Win'].append(bet_wins)
        accuracy_dict['Bet on Loss'].append(bet_losses)
        accuracy_dict['Accurate Wins'].append(win_perc)
        accuracy_dict['Accurate Losses'].append(loss_perc)

    accuracy_df = pd.DataFrame.from_dict(accuracy_dict)
    return accuracy_df

df = create_threshold_df(results, 'Preds')

ZeroDivisionError: division by zero

In [100]:
new_df, home_result, vis_result = calc_result(results, 'Preds', 0.8, good_results)
print(home_result)
print(vis_result)

-101.14
-559.53
