In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import os
import pickle
from sklearn.metrics import confusion_matrix
import requests, bs4
from sklearn.metrics import roc_curve, auc

In [2]:
db19 = pd.read_csv('2019stats.csv')

### Check results using 2016-2018 data on 2019 games 

In [3]:
db = pd.read_csv('2017_2018stats.csv')

In [4]:
feature3Cols = ['Location','Rolling3EFG','Rolling3TOV','Rolling3ORB','Rolling3FTR','Rolling3Pace','Opp R3Pace','Opp R3EFG','Opp R3TOV','Opp R3ORB','Opp R3FTR', 'Rolling3DEff', 'Opp R3 Deff', 'Rolling3OEff','Opp R3 Oeff']
feature9Cols = ['Location','Rolling9EFG','Rolling9TOV','Rolling9ORB','Rolling9FTR','Rolling9Pace','Opp R9Pace','Opp R9EFG','Opp R9TOV','Opp R9ORB','Opp R9FTR', 'Rolling9DEff', 'Opp R9 Deff', 'Rolling3OEff','Opp R3 Oeff']

target = ['Result']
X3 = db.dropna()[feature3Cols]
X9 = db.dropna()[feature9Cols]
y = np.array(db.dropna()[target]).flatten()
svc=SVC(probability=True, gamma = 'scale')

x9_train, x9_test, y9_train, y9_test = train_test_split(X9, y, test_size=0.3, random_state=2)
x3_train, x3_test, y3_train, y3_test = train_test_split(X3, y, test_size=0.3, random_state=2)


### Training models

In [5]:
ada9 = AdaBoostClassifier(base_estimator = svc, n_estimators=50, learning_rate=0.1, random_state=1).fit(x9_train, y9_train)
pred = ada9.predict(x9_test)
print('All picks (R9):',ada9.score(x9_test, y9_test))
print('All picks (R9) matrix:\n',confusion_matrix(y9_test, pred))
print(' ')
false_positive_rate, true_positive_rate, thresholds = roc_curve(y9_test, pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
print(roc_auc)

ada3 = AdaBoostClassifier(base_estimator = svc, n_estimators=50, learning_rate=0.1, random_state=1).fit(x3_train, y3_train)
pred = ada3.predict(x3_test)
print('All picks (R3):',ada3.score(x3_test, y3_test))
print('All picks (R3) matrix:\n',confusion_matrix(y3_test, pred))
false_positive_rate, true_positive_rate, thresholds = roc_curve(y3_test, pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
print(roc_auc)
print(' ')


All picks (R9): 0.5951293759512938
All picks (R9) matrix:
 [[249 400]
 [132 533]]
 
0.592585469837923
All picks (R3): 0.5745814307458144
All picks (R3) matrix:
 [[196 453]
 [106 559]]
0.5713022927117486
 


### 2019 test data

In [6]:
dogs2019 = db19[(db19['Line'] > 0)].dropna().reset_index()
Ydogs2019 = db19[(db19['Line'] > 0)].dropna()['Result']

In [10]:
print("Using dog data[rolling 9 averages]: \n")

print("Using 9 game standard predictor: ")
pred1 = ada9.predict(dogs2019[feature9Cols])
print('Dog picks:',ada9.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = ada3.predict(dogs2019[feature9Cols])
print('Dog picks:',ada3.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')


Using dog data[rolling 9 averages]: 

Using 9 game standard predictor: 
Dog picks: 0.4575955265610438
Dog picks matrix:
 [[224 492]
 [ 90 267]]

Using 3 game standard predictor: 
Dog picks: 0.45293569431500463
Dog picks matrix:
 [[220 496]
 [ 91 266]]



In [11]:
odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
import statistics
statistics.mean(odds)

251

204-180 with +198 odds picking dogs wow

In [12]:
print("Using rolling 3 averages: \n")

print("Using 9 game standard predictor: ")
pred1 = ada9.predict(dogs2019[feature3Cols])
print('Dog picks:',ada9.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = ada3.predict(dogs2019[feature3Cols])
print('Dog picks:',ada3.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')


Using rolling 3 averages: 

Using 9 game standard predictor: 
Dog picks: 0.4296365330848089
Dog picks matrix:
 [[159 557]
 [ 55 302]]

Using 3 game standard predictor: 
Dog picks: 0.4193849021435228
Dog picks matrix:
 [[165 551]
 [ 72 285]]



In [13]:
odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
        
print(statistics.mean(odds))
print(statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
        
print(statistics.mean(odds))
print(statistics.median(odds))

256
185
257
185.0


#### Predictor 2 above goes 246-233 with an average odds of 210!

##### Now what I wanna do is build a model off 2018 data while continuously rolling in 2019 data as if the season was ongoing

In [14]:
#db = pd.read_csv('2018stats.csv')

In [17]:
#feature3Cols = ['Location','Rolling3EFG','Rolling3TOV','Rolling3ORB','Rolling3FTR','Rolling3Pace','Opp R3Pace','Opp R3EFG','Opp R3TOV','Opp R3ORB','Opp R3FTR']
#feature9Cols = ['Location','Rolling9EFG','Rolling9TOV','Rolling9ORB','Rolling9FTR','Rolling9Pace','Opp R9Pace','Opp R9EFG','Opp R9TOV','Opp R9ORB','Opp R9FTR']

#target = ['Result']
X3 = db.dropna()[feature3Cols]
X9 = db.dropna()[feature9Cols]
y = np.array(db.dropna()[target]).flatten()
svc=SVC(probability=True, gamma = 'scale')

x9_train, x9_test, y9_train, y9_test = train_test_split(X9, y, test_size=0.3, random_state=8)
x3_train, x3_test, y3_train, y3_test = train_test_split(X3, y, test_size=0.3, random_state=8)


In [18]:
ada9 = AdaBoostClassifier(base_estimator = svc, n_estimators=50, learning_rate=0.1, random_state=1).fit(x9_train, y9_train)
pred = ada9.predict(x9_test)
print('All picks (R9):',ada9.score(x9_test, y9_test))
print('All picks (R9) matrix:\n',confusion_matrix(y9_test, pred))
print(' ')
false_positive_rate, true_positive_rate, thresholds = roc_curve(y9_test, pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
print(roc_auc)

ada3 = AdaBoostClassifier(base_estimator = svc, n_estimators=50, learning_rate=0.1, random_state=1).fit(x3_train, y3_train)
pred = ada3.predict(x3_test)
print('All picks (R3):',ada3.score(x3_test, y3_test))
print('All picks (R3) matrix:\n',confusion_matrix(y3_test, pred))
false_positive_rate, true_positive_rate, thresholds = roc_curve(y3_test, pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
print(roc_auc)
print(' ')

All picks (R9): 0.634703196347032
All picks (R9) matrix:
 [[415 252]
 [228 419]]
 
0.634896616606689
All picks (R3): 0.6286149162861492
All picks (R3) matrix:
 [[442 225]
 [263 384]]
0.6280885832199818
 


In [19]:
dogs2019 = db19[db19['Line'] > 0].dropna().reset_index()
Ydogs2019 = db19[db19['Line'] > 0].dropna()['Result']

In [20]:
print("Using dog data[rolling 9 averages]: \n")

print("Using 9 game standard predictor: ")
pred1 = ada9.predict(dogs2019[feature9Cols])
print('Dog picks:',ada9.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = ada3.predict(dogs2019[feature9Cols])
print('Dog picks:',ada3.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')

Using dog data[rolling 9 averages]: 

Using 9 game standard predictor: 
Dog picks: 0.625349487418453
Dog picks matrix:
 [[545 171]
 [231 126]]

Using 3 game standard predictor: 
Dog picks: 0.6178937558247903
Dog picks matrix:
 [[534 182]
 [228 129]]



Second model is the best one

In [21]:
odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
        
print("Average odds 1:", statistics.mean(odds))
print("Median odds 1:", statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
        
print("Average odds 2:", statistics.mean(odds))
print("Median odds 2:", statistics.median(odds))


Average odds 1: 168
Median odds 1: 150
Average odds 2: 191
Median odds 2: 155


In [22]:
print("Using rolling 3 averages: \n")

print("Using 9 game standard predictor: ")
pred1 = ada9.predict(dogs2019[feature3Cols])
print('Dog picks:',ada9.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = ada3.predict(dogs2019[feature3Cols])
print('Dog picks:',ada3.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')


Using rolling 3 averages: 

Using 9 game standard predictor: 
Dog picks: 0.6393289841565704
Dog picks matrix:
 [[541 175]
 [212 145]]

Using 3 game standard predictor: 
Dog picks: 0.6393289841565704
Dog picks matrix:
 [[543 173]
 [214 143]]



In [23]:
odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
        
print("Average odds 1:", statistics.mean(odds))
print("Median odds 1:", statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])
        
print("Average odds 2:", statistics.mean(odds))
print("Median odds 2:", statistics.median(odds))

Average odds 1: 185
Median odds 1: 160.0
Average odds 2: 192
Median odds 2: 160.0


Above: model 1 goes 163-191 with average betting odds of 182; this is profitable


In [63]:
db19 = pd.read_csv('2019stats.csv')
#db = pd.read_csv('2018stats.csv')

In [24]:
#db19.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis = 1, inplace = True)
db19['Date'] = db19['Date'].apply(str)
db19['Date'] = pd.to_datetime(db19.Date, format = "%Y%m%d")
db19 = db19.sort_values(by = ['Date']).reset_index(drop = True)

In [25]:
db['Date'] = db['Date'].apply(str)
db['Date'] = pd.to_datetime(db.Date, format= "%Y%m%d")
db = db.sort_values(by = ['Date']).reset_index(drop = True)
test = pd.concat([db, db19[:int(len(db19)/2)]], ignore_index = True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  after removing the cwd from sys.path.


In [26]:
# Leave in first half of prev season or cut it out?

test = test.drop(test.index[:int(len(db19)/2)]) 

In [27]:
db = test.reset_index(drop = True) ### for simplicity

In [30]:
db = db.drop(['Implied Prob','Implied Proba'], axis = 1)

### Here is a realistic expectation of what a running database may look like. I added half the season's worth of 2019 onto the 2018 db, and dropped the first half of 2018. The model now looks at the second half of 2018 and the first half of 2019.

In [31]:
#feature3Cols = ['Location','Rolling3EFG','Rolling3TOV','Rolling3ORB','Rolling3FTR','Rolling3Pace','Opp R3Pace','Opp R3EFG','Opp R3TOV','Opp R3ORB','Opp R3FTR']
#feature9Cols = ['Location','Rolling9EFG','Rolling9TOV','Rolling9ORB','Rolling9FTR','Rolling9Pace','Opp R9Pace','Opp R9EFG','Opp R9TOV','Opp R9ORB','Opp R9FTR']

target = ['Result']
X3 = db.dropna()[feature3Cols]
X9 = db.dropna()[feature9Cols]
y = np.array(db.dropna()[target]).flatten()
svc = SVC(probability=True, gamma = 'scale')

x9_train, x9_test, y9_train, y9_test = train_test_split(X9, y, test_size=0.3, random_state=3)
x3_train, x3_test, y3_train, y3_test = train_test_split(X3, y, test_size=0.3, random_state=3)


In [32]:
ada9 = AdaBoostClassifier(base_estimator = svc, n_estimators=50, learning_rate=0.1, random_state=3).fit(x9_train, y9_train)
pred = ada9.predict(x9_test)
print('All picks (R9):',ada9.score(x9_test, y9_test))
print('All picks (R9) matrix:\n',confusion_matrix(y9_test, pred))
false_positive_rate, true_positive_rate, thresholds = roc_curve(y9_test, pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
print(roc_auc)
print(' ')

ada3 = AdaBoostClassifier(base_estimator = svc, n_estimators=50, learning_rate=0.1, random_state=3).fit(x3_train, y3_train)
pred = ada3.predict(x3_test)
print('All picks (R3):',ada3.score(x3_test, y3_test))
print('All picks (R3) matrix:\n',confusion_matrix(y3_test, pred))
false_positive_rate, true_positive_rate, thresholds = roc_curve(y3_test, pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
print(roc_auc)
print(' ')


All picks (R9): 0.6286149162861492
All picks (R9) matrix:
 [[376 276]
 [212 450]]
0.6282227123607584
 
All picks (R3): 0.6354642313546424
All picks (R3) matrix:
 [[417 235]
 [244 418]]
0.6354952458621391
 


In [33]:
dogs2019 = db19[int(len(db19)/2):][db19['Line'] > 0].dropna().reset_index()
Ydogs2019 = db19[int(len(db19)/2):][db19['Line'] > 0].dropna()['Result']

  """Entry point for launching an IPython kernel.
  


### This is now predicting all underdog games for last half of season

In [34]:
print("Using dog data[rolling 9 averages]: \n")

print("Using 9 game standard predictor: ")
pred1 = ada9.predict(dogs2019[feature9Cols])
print('Dog picks:',ada9.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = ada3.predict(dogs2019[feature9Cols])
print('Dog picks:',ada3.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')

Using dog data[rolling 9 averages]: 

Using 9 game standard predictor: 
Dog picks: 0.6264462809917355
Dog picks matrix:
 [[291 121]
 [105  88]]

Using 3 game standard predictor: 
Dog picks: 0.6396694214876033
Dog picks matrix:
 [[305 107]
 [111  82]]



In [35]:
odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))

Average odds: 184
Median odds: 160
Average odds: 191
Median odds: 160


In [36]:
print("Using rolling 3 averages: \n")

print("Using 9 game standard predictor: ")
pred1 = ada9.predict(dogs2019[feature3Cols])
print('Dog picks:',ada9.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = ada3.predict(dogs2019[feature3Cols])
print('Dog picks:',ada3.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')


Using rolling 3 averages: 

Using 9 game standard predictor: 
Dog picks: 0.6694214876033058
Dog picks matrix:
 [[309 103]
 [ 97  96]]

Using 3 game standard predictor: 
Dog picks: 0.6677685950413224
Dog picks matrix:
 [[314  98]
 [103  90]]



In [37]:
odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))

Average odds: 181
Median odds: 170
Average odds: 198
Median odds: 167.5


This model drops 2018 data when adding first half of 2019.

The 9-game predictor goes 85-141 using 3-game average data, with average odds of 229.

The 3-game predictor goes 70-116 using 3-game average data, with average odds of 220.

In [38]:
pkl_filename = "ADA9model.pkl"  
with open(pkl_filename, 'wb') as file:  
    pickle.dump(ada9, file)
    
pkl_filename = "ADA3model.pkl"  
with open(pkl_filename, 'wb') as file:  
    pickle.dump(ada3, file)

In [58]:
model3 = pickle.load(open('GBC3Model.pkl', 'rb'))
model9 = pickle.load(open('GBC9Model.pkl','rb'))



In [59]:
print("Using dog data[rolling 9 averages]: \n")

print("Using 9 game standard predictor: ")
pred1 = model9.predict(dogs2019[feature9Cols])
print('Dog picks:',model9.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = model3.predict(dogs2019[feature9Cols])
print('Dog picks:',model3.score(dogs2019[feature9Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')

odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))


print("\nUsing rolling 3 averages: \n")

print("Using 9 game standard predictor: ")
pred1 = model9.predict(dogs2019[feature3Cols])
print('Dog picks:',model9.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred1))
print('')

print("Using 3 game standard predictor: ")
pred2 = model3.predict(dogs2019[feature3Cols])
print('Dog picks:',model3.score(dogs2019[feature3Cols], Ydogs2019))
print('Dog picks matrix:\n',confusion_matrix(Ydogs2019, pred2))
print('')

odds = []
for i, n in enumerate(pred1):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))

odds = []
for i, n in enumerate(pred2):
    if n == 1:
        odds.append(dogs2019['Odds'][i])

print("Average odds:", statistics.mean(odds))
print("Median odds:", statistics.median(odds))



Using dog data[rolling 9 averages]: 

Using 9 game standard predictor: 
Dog picks: 0.6264462809917355
Dog picks matrix:
 [[309 103]
 [123  70]]

Using 3 game standard predictor: 
Dog picks: 0.6264462809917355
Dog picks matrix:
 [[316  96]
 [130  63]]

Average odds: 191.9364161849711
Median odds: 165.0
Average odds: 192.0251572327044
Median odds: 160.0

Using rolling 3 averages: 

Using 9 game standard predictor: 
Dog picks: 0.6082644628099173
Dog picks matrix:
 [[281 131]
 [106  87]]

Using 3 game standard predictor: 
Dog picks: 0.6099173553719008
Dog picks matrix:
 [[288 124]
 [112  81]]

Average odds: 232.90825688073394
Median odds: 175.0
Average odds: 227.15121951219513
Median odds: 175.0
