# PART 3.2: Model Development

## Guard Model

In [11]:
"""
"""

import os
import joblib
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor
import xgboost as xgb
from sklearn.metrics import mean_absolute_error

from sklearn.feature_selection import RFE

working_directory = 'D:/machine_learning/DFS/NBA/NBA_moredata'
os.chdir(working_directory)
data_dir = 'Data/'
etl_dir = 'Data/ETL/'

player_stats = pd.read_csv(data_dir + 'player_stats_all.csv', index_col = 0)
g_vs = pd.read_csv(etl_dir + 'g_stats.csv', index_col = 0)

g_vs = g_vs.rename(columns={'Opp':'Defense', 'Team_x' : 'Team'})

print(g_vs.columns.tolist())

#Grab Only Necessary Columns & Filter Only To G Data
g_act_stats = player_stats[(player_stats['Pos.']=='G') | (player_stats['Pos.']=='F-G') | (player_stats['Pos.']=='G-F')].copy().reset_index(drop=True)

g_act_stats = g_act_stats.fillna(0)

g_act_stats.drop(list(set(g_act_stats.columns) - set(g_vs)), axis = 1, inplace = True)

#Calculate The Draftkings Points for each player on each date
g_act_stats['Act_G_DKPts'] = (g_act_stats['3P'] * 1 + g_act_stats['AST'] * 1.5 +\
                           g_act_stats['BLK'] * 3 + g_act_stats['FG'] * 2 +\
                           g_act_stats['FT'] * 1 + g_act_stats['TRB'] * 1.2 +\
                           g_act_stats['STL'] * 3 + g_act_stats['TOV'] * -1)

#G DK PTS Rank For The Given Season & Date Pair
g_act_stats['Act_G_DKPtsRank'] = g_act_stats.groupby(['Season','Date'])['Act_G_DKPts'].rank(method='min', ascending = False)

#Columns We Want To Add To Dataset
keep_cols = ['Season','Date','Player','Act_G_DKPtsRank','Act_G_DKPts']

#Append Actual DK Pts Rank & DK Pts
g_vs_act = pd.merge(g_vs, g_act_stats[keep_cols], how = 'left', on = ['Season','Date','Player'])
g_vs_act = g_vs_act[g_vs_act['Act_G_DKPts']>0].reset_index(drop=True)

#Make sure we have no duplicated columns or infinity errors
g_vs_act = g_vs_act.loc[:,~g_vs_act.columns.duplicated()]
g_vs_act = g_vs_act.replace([np.inf, -np.inf], np.nan)
g_vs_act.to_csv(etl_dir + 'g_v_def_stats.csv')

#Columns We Can't Include In Our Features Datasets
dcols = ['Age',
         'at',
         'Result',
         'GS',
         'FG',
         'FGA',
         'FG%',
         '2P',
         '2PA',
         '2P%',
         '3P',
         '3PA',
         '3P%',
         'FT',
         'FTA',
         'FT%',
         'TS%',
         'ORB',
         'DRB',
         'TRB',
         'AST',
         'STL',
         'BLK',
         'TOV',
         'PF',
         'PTS',
         'GmSc',
         'BPM',
         'Pos.',
         'Month',
         'Year',
         'Team_y',
         'Act_G_DKPts',
         'EFF',
         'MP'
]


more_dcols = ['Season', 'Date', 'Team', 'Defense', 'Player', 'Act_G_DKPtsRank']

# g_vs_act.drop_duplicates(subset=['Player', 'Date'], keep='first', inplace = True, ignore_index = True)

X = g_vs_act.drop(dcols, axis = 1)
Y = g_vs_act['Act_G_DKPts']

from sklearn.model_selection import train_test_split

#Create Training and Testing DataSets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.25, random_state=42)

X_train.reset_index(inplace = True, drop=True)
X_test.reset_index(inplace = True, drop=True)
Y_train.reset_index(inplace = True, drop=True)
Y_test.reset_index(inplace = True, drop=True)

print('Training set size:', len(X_train))
print('Testing set size:', len(X_test))

pred_df = pd.concat([X_test, Y_test], axis = 1)

X_train.drop(more_dcols, axis = 1, inplace = True)
X_test.drop(more_dcols, axis = 1, inplace = True)

print('\nNum Possible Features:',len(X_train.columns.tolist()))

['Player', 'Date', 'Age', 'Team', 'at', 'Defense', 'Result', 'GS', 'MP', 'FG', 'FGA', 'FG%', '2P', '2PA', '2P%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TS%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'GmSc', 'BPM', 'Pos.', 'EFF', 'Month', 'Year', 'Season', 'MP3', 'MP_pg3', 'FG3', 'FG_pg3', 'FGA3', 'FGA_pg3', 'FG%_pg3', '2P3', '2P_pg3', '2PA3', '2PA_pg3', '2P%_pg3', '3P3', '3P_pg3', '3PA3', '3PA_pg3', '3P%_pg3', 'FT3', 'FT_pg3', 'FTA3', 'FTA_pg3', 'FT%3', 'FT%_pg3', 'TS%_pg3', 'ORB3', 'ORB_pg3', 'DRB3', 'DRB_pg3', 'TRB3', 'TRB_pg3', 'AST3', 'AST_pg3', 'STL3', 'STL_pg3', 'BLK3', 'BLK_pg3', 'TOV3', 'TOV_pg3', 'PF3', 'PF_pg3', 'PTS3', 'PTS_pg3', 'GmSc3', 'GmSc_pg3', 'BPM_pg3', 'EFF3', 'EFF_pg3', 'g_MP3Rank3', 'g_FG3Rank3', 'g_FGARank3', 'g_FG%Rank3', 'g_2PRank3', 'g_2PARank3', 'g_2P%Rank3', 'g_3PRank3', 'g_3PARank3', 'g_3P%Rank3', 'g_FTRank3', 'g_FTARank3', 'g_FT%Rank3', 'g_TS%Rank3', 'g_ORBRank3', 'g_DRBRank3', 'g_TRBRank3', 'g_ASTRank3', 'g_STLRank3', 'g_BLKRank3', 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [12]:
# needs hyperparameters
def lgbm_mod(): return LGBMRegressor(random_state = 1, n_estimators = 1000, learning_rate = 0.01, n_jobs = -1)
def xgb_mod(): return xgb.XGBRegressor(random_state = 1)
def rf_mod(): return RandomForestRegressor(random_state = 1, n_jobs = -1, n_estimators = 1000)
def sgd_mod(): return SGDRegressor(max_iter=100, tol=1e-3, random_state = 1) # won't work with feature importance
def svm_mod(): return SVR(C=1.0, epsilon=0.2) # won't work with feature importance
def neigh_mod(): return KNeighborsRegressor()
def dt_mod(): return DecisionTreeRegressor(random_state=0)

""" MODEL SELECTION """

model = rf_mod()

"""                 """

#print possible features
print('possible features:', X_train.columns.tolist(), '\n')

# Fit model, make predictions with all features
model.fit(X_train, Y_train)

preds_all = model.predict(X_test)

pdf = pred_df[['Season','Date','Team','Defense','Player','Act_G_DKPtsRank','Act_G_DKPts']].copy()

pdf['Pred_G_DKPts_all'] = preds_all
pdf['PredictedallRank'] = pdf.groupby(['Season','Date'])['Pred_G_DKPts_all'].rank(method='min', ascending = False)
temp_df_all = pdf[pdf['PredictedallRank']<=5]

# save the initial model to disk
filename = 'models/RF_models/G_model_allfeats.pkl'
joblib.dump(model, filename) 

# get top 50 features
dset = pd.DataFrame({'attr':X_train.columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr50 = dset['attr'][0:50].tolist()

# Using Top 50 Features, Find Top 30 Features
model.fit(X_train[attr50], Y_train)
dset = pd.DataFrame({'attr':X_train[attr50].columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr30 = dset['attr'][0:30].tolist()

# Using Top 30 Features, Find Top 20 Features
model.fit(X_train[attr30], Y_train)
dset = pd.DataFrame({'attr':X_train[attr30].columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr20 = dset['attr'][0:20].tolist()

#Perform RFE (recursive feature elimination) using Top 20 Features, To Find Top 10
rfe_model = RFE(model, n_features_to_select = 10)
rfe_model.fit(X_train[attr20], Y_train)
dset = pd.DataFrame({'attr':X_train[attr20].columns.tolist(),'importance':rfe_model.ranking_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
cols10 = dset[dset['importance']==1]['attr'].tolist()

print('T50 features', attr50, '\n')
print('T30 features', attr30, '\n')
print('T20 features', attr20, '\n')
print('T10 features',cols10, '\n')

model.fit(X_train[attr50], Y_train)
preds50 = model.predict(X_test[attr50])
filename = 'models/RF_models/G_model_50feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[attr30], Y_train)
preds30 = model.predict(X_test[attr30])
filename = 'models/RF_models/G_model_30feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[attr20], Y_train)
preds20 = model.predict(X_test[attr20])
filename = 'models/RF_models/G_model_20feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[cols10], Y_train)
preds10 = model.predict(X_test[cols10])
filename = 'models/RF_models/G_model_10feats.pkl'
joblib.dump(model, filename) 

# pdf = pred_df[['Season','Week','Team','Defense','PlayerID','Name','Act_G_DKPtsRank','Act_G_DKPts']].copy()
pdf['Pred_G_DKPts_50'] = preds50
pdf['Predicted50Rank'] = pdf.groupby(['Season','Date'])['Pred_G_DKPts_50'].rank(method='min', ascending = False)
pdf['Pred_G_DKPts_30'] = preds30
pdf['Predicted30Rank'] = pdf.groupby(['Season','Date'])['Pred_G_DKPts_30'].rank(method='min', ascending = False)
pdf['Pred_G_DKPts_20'] = preds20
pdf['Predicted20Rank'] = pdf.groupby(['Season','Date'])['Pred_G_DKPts_20'].rank(method='min', ascending = False)
pdf['Pred_G_DKPts_10'] = preds10
pdf['Predicted10Rank'] = pdf.groupby(['Season','Date'])['Pred_G_DKPts_10'].rank(method='min', ascending = False)
pdf.to_csv(etl_dir + 'g_predictions_medium_50_30_20_10.csv')

temp_df50 = pdf[pdf['Predicted50Rank']<=5]
temp_df30 = pdf[pdf['Predicted30Rank']<=5]
temp_df20 = pdf[pdf['Predicted20Rank']<=5]
temp_df10 = pdf[pdf['Predicted10Rank']<=5]

feature_sets = ['all', '50', '30', '20', '10']

mae_values = [
    "{:.2f}".format(mean_absolute_error(Y_test, preds_all)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds50)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds30)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds20)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds10))
]

results_df = pd.DataFrame({'Features' : feature_sets, 'MAE' : mae_values})

results_df.style.hide_index()
display(results_df)

possible features: ['MP3', 'MP_pg3', 'FG3', 'FG_pg3', 'FGA3', 'FGA_pg3', 'FG%_pg3', '2P3', '2P_pg3', '2PA3', '2PA_pg3', '2P%_pg3', '3P3', '3P_pg3', '3PA3', '3PA_pg3', '3P%_pg3', 'FT3', 'FT_pg3', 'FTA3', 'FTA_pg3', 'FT%3', 'FT%_pg3', 'TS%_pg3', 'ORB3', 'ORB_pg3', 'DRB3', 'DRB_pg3', 'TRB3', 'TRB_pg3', 'AST3', 'AST_pg3', 'STL3', 'STL_pg3', 'BLK3', 'BLK_pg3', 'TOV3', 'TOV_pg3', 'PF3', 'PF_pg3', 'PTS3', 'PTS_pg3', 'GmSc3', 'GmSc_pg3', 'BPM_pg3', 'EFF3', 'EFF_pg3', 'g_MP3Rank3', 'g_FG3Rank3', 'g_FGARank3', 'g_FG%Rank3', 'g_2PRank3', 'g_2PARank3', 'g_2P%Rank3', 'g_3PRank3', 'g_3PARank3', 'g_3P%Rank3', 'g_FTRank3', 'g_FTARank3', 'g_FT%Rank3', 'g_TS%Rank3', 'g_ORBRank3', 'g_DRBRank3', 'g_TRBRank3', 'g_ASTRank3', 'g_STLRank3', 'g_BLKRank3', 'g_TOVRank3', 'g_PFRank3', 'g_PTSRank3', 'g_GMScRank3', 'g_BPMRank3', 'g_EFFRank3', 'g_DKPts3', 'g_DKPtsRank3', 'MinutesPlayed', 'MinutesPlayed_pg', 'FieldGoals', 'FieldGoals_pg', 'FieldGoalAttempts', 'FieldGoalAttempts_pg', 'FieldGoalPercentage_pg', 'TwoPoin

Unnamed: 0,Features,MAE
0,all,5.86
1,50,5.86
2,30,5.87
3,20,5.89
4,10,5.95


In [13]:
pdf[['Season', 'Date', 'Player', 'Act_G_DKPts', 'Pred_G_DKPts_all', 'Pred_G_DKPts_50', 'Pred_G_DKPts_30', 'Pred_G_DKPts_20', 'Pred_G_DKPts_10']].sort_values(by = ['Date', 'Act_G_DKPts'], ascending = [True, False]).tail(n=40)

Unnamed: 0,Season,Date,Player,Act_G_DKPts,Pred_G_DKPts_all,Pred_G_DKPts_50,Pred_G_DKPts_30,Pred_G_DKPts_20,Pred_G_DKPts_10
13528,2021,1601,Devin Vassell,25.3,26.2331,25.646,25.7965,25.1393,24.5944
5422,2021,1601,Corey Kispert,25.1,23.1445,23.573,23.0918,22.3001,22.3608
35042,2021,1601,Cody Martin,23.5,21.7097,22.3688,21.9747,21.9767,22.1431
19720,2021,1601,Terry Taylor,22.9,22.9955,22.5435,22.3111,22.5245,20.9963
19266,2021,1601,Ben McLemore,22.8,20.2486,20.3732,19.6007,18.8401,18.2162
17789,2021,1601,Torrey Craig,22.6,21.9244,20.7708,20.4662,19.9098,19.8797
25334,2021,1601,Jose Alvarado,21.2,19.1533,18.7739,17.3689,16.3237,17.2155
37249,2021,1601,Matisse Thybulle,21.1,19.7109,17.7875,18.4732,18.3012,18.5992
24249,2021,1601,Lance Stephenson,20.7,19.7618,19.562,19.492,19.1239,19.1919
2330,2021,1601,Isaiah Thomas,19.7,13.7757,13.8351,14.638,11.384,10.8947


## Forward Model

In [14]:
"""
"""

import os
import pickle
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor
import xgboost as xgb
from sklearn.metrics import mean_absolute_error

from sklearn.feature_selection import RFE

working_directory = 'D:/machine_learning/DFS/NBA/NBA_moredata'
os.chdir(working_directory)
data_dir = 'Data/'
etl_dir = 'Data/ETL/'

player_stats = pd.read_csv(data_dir + 'player_stats_all.csv', index_col = 0)
f_vs = pd.read_csv(etl_dir + 'f_stats.csv', index_col = 0)

f_vs = f_vs.rename(columns={'Opp':'Defense', 'Team_x' : 'Team'})

print(f_vs.columns.tolist())

#Grab Only Necessary Columns & Filter Only To F Data
f_act_stats = player_stats[(player_stats['Pos.']=='F') | (player_stats['Pos.']=='F-G') | (player_stats['Pos.']=='G-F') | (player_stats['Pos.']=='F-C')].copy().reset_index(drop=True)

f_act_stats = f_act_stats.fillna(0)

f_act_stats.drop(list(set(f_act_stats.columns) - set(f_vs)), axis = 1, inplace = True)

#Calculate The Draftkings Points for each player on each date
f_act_stats['Act_F_DKPts'] = (f_act_stats['3P'] * 1 + f_act_stats['AST'] * 1.5 +\
                           f_act_stats['BLK'] * 3 + f_act_stats['FG'] * 2 +\
                           f_act_stats['FT'] * 1 + f_act_stats['TRB'] * 1.2 +\
                           f_act_stats['STL'] * 3 + f_act_stats['TOV'] * -1)

#G DK PTS Rank For The Given Season & Date Pair
f_act_stats['Act_F_DKPtsRank'] = f_act_stats.groupby(['Season','Date'])['Act_F_DKPts'].rank(method='min', ascending = False)

#Columns We Want To Add To Dataset
keep_cols = ['Season','Date','Player','Act_F_DKPtsRank','Act_F_DKPts']

#Append Actual DK Pts Rank & DK Pts
f_vs_act = pd.merge(f_vs, f_act_stats[keep_cols], how = 'left', on = ['Season','Date','Player'])
f_vs_act = f_vs_act[f_vs_act['Act_F_DKPts']>0].reset_index(drop=True)

#Make sure we have no duplicated columns or infinity errors
f_vs_act = f_vs_act.loc[:,~f_vs_act.columns.duplicated()]
f_vs_act = f_vs_act.replace([np.inf, -np.inf], np.nan)
f_vs_act.to_csv(etl_dir + 'g_v_def_stats.csv')

#Columns We Can't Include In Our Features Datasets
dcols = ['Age',
         'at',
         'Result',
         'GS',
         'FG',
         'FGA',
         'FG%',
         '2P',
         '2PA',
         '2P%',
         '3P',
         '3PA',
         '3P%',
         'FT',
         'FTA',
         'FT%',
         'TS%',
         'ORB',
         'DRB',
         'TRB',
         'AST',
         'STL',
         'BLK',
         'TOV',
         'PF',
         'PTS',
         'GmSc',
         'BPM',
         'Pos.',
         'Month',
         'Year',
         'Team_y',
         'Act_F_DKPts',
         'EFF',
         'MP'
]


more_dcols = ['Season', 'Date', 'Team', 'Defense', 'Player', 'Act_F_DKPtsRank']

# f_vs_act.drop_duplicates(subset=['Player', 'Date'], keep='first', inplace = True, ignore_index = True)

X = f_vs_act.drop(dcols, axis = 1)
Y = f_vs_act['Act_F_DKPts']

from sklearn.model_selection import train_test_split

#Create Training and Testing DataSets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.25, random_state=42)

X_train.reset_index(inplace = True, drop=True)
X_test.reset_index(inplace = True, drop=True)
Y_train.reset_index(inplace = True, drop=True)
Y_test.reset_index(inplace = True, drop=True)

print('Training set size:', len(X_train))
print('Testing set size:', len(X_test))

pred_df = pd.concat([X_test, Y_test], axis = 1)

X_train.drop(more_dcols, axis = 1, inplace = True)
X_test.drop(more_dcols, axis = 1, inplace = True)

print('\nNum Possible Features:',len(X_train.columns.tolist()))

['Player', 'Date', 'Age', 'Team', 'at', 'Defense', 'Result', 'GS', 'MP', 'FG', 'FGA', 'FG%', '2P', '2PA', '2P%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TS%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'GmSc', 'BPM', 'Pos.', 'EFF', 'Month', 'Year', 'Season', 'MP3', 'MP_pg3', 'FG3', 'FG_pg3', 'FGA3', 'FGA_pg3', 'FG%_pg3', '2P3', '2P_pg3', '2PA3', '2PA_pg3', '2P%_pg3', '3P3', '3P_pg3', '3PA3', '3PA_pg3', '3P%_pg3', 'FT3', 'FT_pg3', 'FTA3', 'FTA_pg3', 'FT%3', 'FT%_pg3', 'TS%_pg3', 'ORB3', 'ORB_pg3', 'DRB3', 'DRB_pg3', 'TRB3', 'TRB_pg3', 'AST3', 'AST_pg3', 'STL3', 'STL_pg3', 'BLK3', 'BLK_pg3', 'TOV3', 'TOV_pg3', 'PF3', 'PF_pg3', 'PTS3', 'PTS_pg3', 'GmSc3', 'GmSc_pg3', 'BPM_pg3', 'EFF3', 'EFF_pg3', 'f_MP3Rank3', 'f_FG3Rank3', 'f_FGARank3', 'f_FG%Rank3', 'f_2PRank3', 'f_2PARank3', 'f_2P%Rank3', 'f_3PRank3', 'f_3PARank3', 'f_3P%Rank3', 'f_FTRank3', 'f_FTARank3', 'f_FT%Rank3', 'f_TS%Rank3', 'f_ORBRank3', 'f_DRBRank3', 'f_TRBRank3', 'f_ASTRank3', 'f_STLRank3', 'f_BLKRank3', 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [15]:
# needs hyperparameters
def lgbm_mod(): return LGBMRegressor(random_state = 1, n_estimators = 1000, learning_rate = 0.01, n_jobs = -1)
def xgb_mod(): return xgb.XGBRegressor(random_state = 1)
def rf_mod(): return RandomForestRegressor(random_state = 1, n_jobs = -1, n_estimators = 1000)
def sgd_mod(): return SGDRegressor(max_iter=100, tol=1e-3, random_state = 1) # won't work with feature importance
def svm_mod(): return SVR(C=1.0, epsilon=0.2) # won't work with feature importance
def neigh_mod(): return KNeighborsRegressor()
def dt_mod(): return DecisionTreeRegressor(random_state=0)

""" MODEL SELECTION """

model = rf_mod()

"""                 """

#print possible features
print('possible features:', X_train.columns.tolist(), '\n')

# Fit model, make predictions with all features
model.fit(X_train, Y_train)

preds_all = model.predict(X_test)

pdf = pred_df[['Season','Date','Team','Defense','Player','Act_F_DKPtsRank','Act_F_DKPts']].copy()

pdf['Pred_F_DKPts_all'] = preds_all
pdf['PredictedallRank'] = pdf.groupby(['Season','Date'])['Pred_F_DKPts_all'].rank(method='min', ascending = False)
temp_df_all = pdf[pdf['PredictedallRank']<=5]

# save the initial model to disk
filename = 'models/RF_models/F_model_allfeats.pkl'
joblib.dump(model, filename) 

# get top 50 features
dset = pd.DataFrame({'attr':X_train.columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr50 = dset['attr'][0:50].tolist()

# Using Top 50 Features, Find Top 30 Features
model.fit(X_train[attr50], Y_train)
dset = pd.DataFrame({'attr':X_train[attr50].columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr30 = dset['attr'][0:30].tolist()

# Using Top 30 Features, Find Top 20 Features
model.fit(X_train[attr30], Y_train)
dset = pd.DataFrame({'attr':X_train[attr30].columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr20 = dset['attr'][0:20].tolist()

#Perform RFE (recursive feature elimination) using Top 20 Features, To Find Top 10
rfe_model = RFE(model, n_features_to_select = 10)
rfe_model.fit(X_train[attr20], Y_train)
dset = pd.DataFrame({'attr':X_train[attr20].columns.tolist(),'importance':rfe_model.ranking_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
cols10 = dset[dset['importance']==1]['attr'].tolist()


print('T50 features', attr50, '\n')
print('T30 features', attr30, '\n')
print('T20 features', attr20, '\n')
print('T10 features',cols10, '\n')

model.fit(X_train[attr50], Y_train)
preds50 = model.predict(X_test[attr50])
filename = 'models/RF_models/F_model_50feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[attr30], Y_train)
preds30 = model.predict(X_test[attr30])
filename = 'models/RF_models/F_model_30feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[attr20], Y_train)
preds20 = model.predict(X_test[attr20])
filename = 'models/RF_models/F_model_20feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[cols10], Y_train)
preds10 = model.predict(X_test[cols10])
filename = 'models/RF_models/F_model_10feats.pkl'
joblib.dump(model, filename) 

# pdf = pred_df[['Season','Week','Team','Defense','PlayerID','Name','Act_F_DKPtsRank','Act_F_DKPts']].copy()
pdf['Pred_F_DKPts_50'] = preds50
pdf['Predicted50Rank'] = pdf.groupby(['Season','Date'])['Pred_F_DKPts_50'].rank(method='min', ascending = False)
pdf['Pred_F_DKPts_30'] = preds30
pdf['Predicted30Rank'] = pdf.groupby(['Season','Date'])['Pred_F_DKPts_30'].rank(method='min', ascending = False)
pdf['Pred_F_DKPts_20'] = preds20
pdf['Predicted20Rank'] = pdf.groupby(['Season','Date'])['Pred_F_DKPts_20'].rank(method='min', ascending = False)
pdf['Pred_F_DKPts_10'] = preds10
pdf['Predicted10Rank'] = pdf.groupby(['Season','Date'])['Pred_F_DKPts_10'].rank(method='min', ascending = False)
pdf.to_csv(etl_dir + 'f_predictions_medium_50_30_20_15_10.csv')

temp_df50 = pdf[pdf['Predicted50Rank']<=5]
temp_df30 = pdf[pdf['Predicted30Rank']<=5]
temp_df20 = pdf[pdf['Predicted20Rank']<=5]
temp_df10 = pdf[pdf['Predicted10Rank']<=5]

feature_sets = ['all', '50', '30', '20', '10']

mae_values = [
    "{:.2f}".format(mean_absolute_error(Y_test, preds_all)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds50)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds30)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds20)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds10))
]

results_df = pd.DataFrame({'Features' : feature_sets, 'MAE' : mae_values})

results_df.style.hide_index()
display(results_df)

possible features: ['MP3', 'MP_pg3', 'FG3', 'FG_pg3', 'FGA3', 'FGA_pg3', 'FG%_pg3', '2P3', '2P_pg3', '2PA3', '2PA_pg3', '2P%_pg3', '3P3', '3P_pg3', '3PA3', '3PA_pg3', '3P%_pg3', 'FT3', 'FT_pg3', 'FTA3', 'FTA_pg3', 'FT%3', 'FT%_pg3', 'TS%_pg3', 'ORB3', 'ORB_pg3', 'DRB3', 'DRB_pg3', 'TRB3', 'TRB_pg3', 'AST3', 'AST_pg3', 'STL3', 'STL_pg3', 'BLK3', 'BLK_pg3', 'TOV3', 'TOV_pg3', 'PF3', 'PF_pg3', 'PTS3', 'PTS_pg3', 'GmSc3', 'GmSc_pg3', 'BPM_pg3', 'EFF3', 'EFF_pg3', 'f_MP3Rank3', 'f_FG3Rank3', 'f_FGARank3', 'f_FG%Rank3', 'f_2PRank3', 'f_2PARank3', 'f_2P%Rank3', 'f_3PRank3', 'f_3PARank3', 'f_3P%Rank3', 'f_FTRank3', 'f_FTARank3', 'f_FT%Rank3', 'f_TS%Rank3', 'f_ORBRank3', 'f_DRBRank3', 'f_TRBRank3', 'f_ASTRank3', 'f_STLRank3', 'f_BLKRank3', 'f_TOVRank3', 'f_PFRank3', 'f_PTSRank3', 'f_GMScRank3', 'f_BPMRank3', 'f_EFFRank3', 'f_DKPts3', 'f_DKPtsRank3', 'MinutesPlayed', 'MinutesPlayed_pg', 'FieldGoals', 'FieldGoals_pg', 'FieldGoalAttempts', 'FieldGoalAttempts_pg', 'FieldGoalPercentage_pg', 'TwoPoin

Unnamed: 0,Features,MAE
0,all,5.05
1,50,5.06
2,30,5.07
3,20,5.08
4,10,5.14


In [16]:
pdf.sort_values(by = ['Date', 'Act_F_DKPts'], ascending = [True, False]).head(n=60)

Unnamed: 0,Season,Date,Team,Defense,Player,Act_F_DKPtsRank,Act_F_DKPts,Pred_F_DKPts_all,PredictedallRank,Pred_F_DKPts_50,Predicted50Rank,Pred_F_DKPts_30,Predicted30Rank,Pred_F_DKPts_20,Predicted20Rank,Pred_F_DKPts_10,Predicted10Rank
24645,2012,0,CLE,WAS,Tristan Thompson,10.0,32.5,28.1215,2.0,28.6264,2.0,28.5285,2.0,27.7078,2.0,27.0888,1.0
2124,2012,0,DAL,LAL,Elton Brand,13.0,30.2,17.4478,10.0,17.5896,10.0,18.9772,10.0,16.550814,9.0,12.4844,10.0
27911,2012,0,DAL,LAL,Elton Brand,13.0,30.2,17.4478,10.0,17.5896,10.0,18.9772,10.0,16.550814,9.0,12.4844,10.0
2955,2012,0,BOS,MIA,Brandon Bass,15.0,28.7,22.6983,5.0,20.5475,6.0,19.4515,7.0,15.3479,11.0,15.7291,8.0
21755,2012,0,DAL,LAL,Brandan Wright,18.0,26.0,24.4483,3.0,23.7533,4.0,23.9304,3.0,23.5743,3.0,22.4724,3.0
40267,2012,0,LAL,DAL,Kobe Bryant,21.0,24.2,22.5621,7.0,22.6763,5.0,22.2269,5.0,23.2501,4.0,21.3985,5.0
9370,2012,0,MIA,BOS,Rashard Lewis,25.0,20.5,19.0338,8.0,20.0176,7.0,23.6579,4.0,22.591933,6.0,22.1983,4.0
22673,2012,0,DAL,LAL,Jae Crowder,28.0,17.6,22.5749,6.0,19.9762,8.0,19.3765,8.0,18.862533,8.0,12.4751,12.0
29657,2012,0,LAL,DAL,Antawn Jamison,29.0,17.0,38.0305,1.0,45.1557,1.0,41.5415,1.0,44.6133,1.0,25.4246,2.0
37554,2012,0,CLE,WAS,Tyler Zeller,32.0,13.4,16.0459,12.0,15.9119,12.0,16.6822,12.0,15.2579,12.0,14.6966,9.0


## Center Model

In [17]:
"""
"""

import os
import pickle
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor
import xgboost as xgb
from sklearn.metrics import mean_absolute_error

from sklearn.feature_selection import RFE

working_directory = 'D:/machine_learning/DFS/NBA/NBA_moredata'
os.chdir(working_directory)
data_dir = 'Data/'
etl_dir = 'Data/ETL/'

player_stats = pd.read_csv(data_dir + 'player_stats_all.csv', index_col = 0)
c_vs = pd.read_csv(etl_dir + 'c_stats.csv', index_col = 0)

c_vs = c_vs.rename(columns={'Opp':'Defense', 'Team_x' : 'Team'})

print(c_vs.columns.tolist())

#Grab Only Necessary Columns & Filter Only To C Data
c_act_stats = player_stats[(player_stats['Pos.']=='C') | (player_stats['Pos.']=='C-F') | (player_stats['Pos.']=='F-C')].copy().reset_index(drop=True)

c_act_stats = c_act_stats.fillna(0)

c_act_stats.drop(list(set(c_act_stats.columns) - set(c_vs)), axis = 1, inplace = True)

#Calculate The Draftkings Points for each player on each date
c_act_stats['Act_C_DKPts'] = (c_act_stats['3P'] * 1 + c_act_stats['AST'] * 1.5 +\
                           c_act_stats['BLK'] * 3 + c_act_stats['FG'] * 2 +\
                           c_act_stats['FT'] * 1 + c_act_stats['TRB'] * 1.2 +\
                           c_act_stats['STL'] * 3 + c_act_stats['TOV'] * -1)

#G DK PTS Rank For The Given Season & Date Pair
c_act_stats['Act_C_DKPtsRank'] = c_act_stats.groupby(['Season','Date'])['Act_C_DKPts'].rank(method='min', ascending = False)

#Columns We Want To Add To Dataset
keep_cols = ['Season','Date','Player','Act_C_DKPtsRank','Act_C_DKPts']

#Append Actual DK Pts Rank & DK Pts
c_vs_act = pd.merge(c_vs, c_act_stats[keep_cols], how = 'left', on = ['Season','Date','Player'])
c_vs_act = c_vs_act[c_vs_act['Act_C_DKPts']>0].reset_index(drop=True)

#Make sure we have no duplicated columns or infinity errors
c_vs_act = c_vs_act.loc[:,~c_vs_act.columns.duplicated()]
c_vs_act = c_vs_act.replace([np.inf, -np.inf], np.nan)
c_vs_act.to_csv(etl_dir + 'g_v_def_stats.csv')

#Columns We Can't Include In Our Features Datasets
dcols = ['Age',
         'at',
         'Result',
         'GS',
         'FG',
         'FGA',
         'FG%',
         '2P',
         '2PA',
         '2P%',
         '3P',
         '3PA',
         '3P%',
         'FT',
         'FTA',
         'FT%',
         'TS%',
         'ORB',
         'DRB',
         'TRB',
         'AST',
         'STL',
         'BLK',
         'TOV',
         'PF',
         'PTS',
         'GmSc',
         'BPM',
         'Pos.',
         'Month',
         'Year',
         'Team_y',
         'Act_C_DKPts',
         'EFF',
         'MP'
]


more_dcols = ['Season', 'Date', 'Team', 'Defense', 'Player', 'Act_C_DKPtsRank']

# c_vs_act.drop_duplicates(subset=['Player', 'Date'], keep='first', inplace = True, ignore_index = True)

X = c_vs_act.drop(dcols, axis = 1)
Y = c_vs_act['Act_C_DKPts']

from sklearn.model_selection import train_test_split

#Create Training and Testing DataSets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.25, random_state=42)

X_train.reset_index(inplace = True, drop=True)
X_test.reset_index(inplace = True, drop=True)
Y_train.reset_index(inplace = True, drop=True)
Y_test.reset_index(inplace = True, drop=True)

print('Training set size:', len(X_train))
print('Testing set size:', len(X_test))

pred_df = pd.concat([X_test, Y_test], axis = 1)

X_train.drop(more_dcols, axis = 1, inplace = True)
X_test.drop(more_dcols, axis = 1, inplace = True)

print('\nNum Possible Features:',len(X_train.columns.tolist()))

['Player', 'Date', 'Age', 'Team', 'at', 'Defense', 'Result', 'GS', 'MP', 'FG', 'FGA', 'FG%', '2P', '2PA', '2P%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TS%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'GmSc', 'BPM', 'Pos.', 'EFF', 'Month', 'Year', 'Season', 'MP3', 'MP_pg3', 'FG3', 'FG_pg3', 'FGA3', 'FGA_pg3', 'FG%_pg3', '2P3', '2P_pg3', '2PA3', '2PA_pg3', '2P%_pg3', '3P3', '3P_pg3', '3PA3', '3PA_pg3', '3P%_pg3', 'FT3', 'FT_pg3', 'FTA3', 'FTA_pg3', 'FT%3', 'FT%_pg3', 'TS%_pg3', 'ORB3', 'ORB_pg3', 'DRB3', 'DRB_pg3', 'TRB3', 'TRB_pg3', 'AST3', 'AST_pg3', 'STL3', 'STL_pg3', 'BLK3', 'BLK_pg3', 'TOV3', 'TOV_pg3', 'PF3', 'PF_pg3', 'PTS3', 'PTS_pg3', 'GmSc3', 'GmSc_pg3', 'BPM_pg3', 'EFF3', 'EFF_pg3', 'c_MP3Rank3', 'c_FG3Rank3', 'c_FGARank3', 'c_FG%Rank3', 'c_2PRank3', 'c_2PARank3', 'c_2P%Rank3', 'c_3PRank3', 'c_3PARank3', 'c_3P%Rank3', 'c_FTRank3', 'c_FTARank3', 'c_FT%Rank3', 'c_TS%Rank3', 'c_ORBRank3', 'c_DRBRank3', 'c_TRBRank3', 'c_ASTRank3', 'c_STLRank3', 'c_BLKRank3', 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [18]:
# needs hyperparameters
def lgbm_mod(): return LGBMRegressor(random_state = 1, n_estimators = 1000, learning_rate = 0.01, n_jobs = -1)
def xgb_mod(): return xgb.XGBRegressor(random_state = 1)
def rf_mod(): return RandomForestRegressor(random_state = 1, n_jobs = -1, n_estimators = 1000)
def sgd_mod(): return SGDRegressor(max_iter=100, tol=1e-3, random_state = 1) # won't work with feature importance
def svm_mod(): return SVR(C=1.0, epsilon=0.2) # won't work with feature importance
def neigh_mod(): return KNeighborsRegressor()
def dt_mod(): return DecisionTreeRegressor(random_state=0)

""" MODEL SELECTION """

model = rf_mod()

"""                 """

#print possible features
print('possible features:', X_train.columns.tolist(), '\n')

# Fit model, make predictions with all features
model.fit(X_train, Y_train)

preds_all = model.predict(X_test)

pdf = pred_df[['Season','Date','Team','Defense','Player','Act_C_DKPtsRank','Act_C_DKPts']].copy()

pdf['Pred_C_DKPts_all'] = preds_all
pdf['PredictedallRank'] = pdf.groupby(['Season','Date'])['Pred_C_DKPts_all'].rank(method='min', ascending = False)
temp_df_all = pdf[pdf['PredictedallRank']<=5]

# save the initial model to disk
filename = 'models/RF_models/C_model_allfeats.pkl'
joblib.dump(model, filename) 

# get top 50 features
dset = pd.DataFrame({'attr':X_train.columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr50 = dset['attr'][0:50].tolist()

# Using Top 50 Features, Find Top 30 Features
model.fit(X_train[attr50], Y_train)
dset = pd.DataFrame({'attr':X_train[attr50].columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr30 = dset['attr'][0:30].tolist()

# Using Top 30 Features, Find Top 20 Features
model.fit(X_train[attr30], Y_train)
dset = pd.DataFrame({'attr':X_train[attr30].columns.tolist(),'importance':model.feature_importances_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
attr20 = dset['attr'][0:20].tolist()

#Perform RFE (recursive feature elimination) using Top 20 Features, To Find Top 10
rfe_model = RFE(model, n_features_to_select = 10)
rfe_model.fit(X_train[attr20], Y_train)
dset = pd.DataFrame({'attr':X_train[attr20].columns.tolist(),'importance':rfe_model.ranking_}).sort_values(by='importance', ascending=False).reset_index(drop=True)
cols10 = dset[dset['importance']==1]['attr'].tolist()

print('T50 features', attr50, '\n')
print('T30 features', attr30, '\n')
print('T20 features', attr20, '\n')
print('T10 features',cols10, '\n')

model.fit(X_train[attr50], Y_train)
preds50 = model.predict(X_test[attr50])
filename = 'models/RF_models/C_model_50feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[attr30], Y_train)
preds30 = model.predict(X_test[attr30])
filename = 'models/RF_models/C_model_30feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[attr20], Y_train)
preds20 = model.predict(X_test[attr20])
filename = 'models/RF_models/C_model_20feats.pkl'
joblib.dump(model, filename) 

model.fit(X_train[cols10], Y_train)
preds10 = model.predict(X_test[cols10])
filename = 'models/RF_models/C_model_10feats.pkl'
joblib.dump(model, filename) 

# pdf = pred_df[['Season','Week','Team','Defense','PlayerID','Name','Act_C_DKPtsRank','Act_C_DKPts']].copy()
pdf['Pred_C_DKPts_50'] = preds50
pdf['Predicted50Rank'] = pdf.groupby(['Season','Date'])['Pred_C_DKPts_50'].rank(method='min', ascending = False)
pdf['Pred_C_DKPts_30'] = preds30
pdf['Predicted30Rank'] = pdf.groupby(['Season','Date'])['Pred_C_DKPts_30'].rank(method='min', ascending = False)
pdf['Pred_C_DKPts_20'] = preds20
pdf['Predicted20Rank'] = pdf.groupby(['Season','Date'])['Pred_C_DKPts_20'].rank(method='min', ascending = False)
pdf['Pred_C_DKPts_10'] = preds10
pdf['Predicted10Rank'] = pdf.groupby(['Season','Date'])['Pred_C_DKPts_10'].rank(method='min', ascending = False)
pdf.to_csv(etl_dir + 'c_predictions_medium_50_30_20_15_10.csv')

temp_df50 = pdf[pdf['Predicted50Rank']<=5]
temp_df30 = pdf[pdf['Predicted30Rank']<=5]
temp_df20 = pdf[pdf['Predicted20Rank']<=5]
temp_df10 = pdf[pdf['Predicted10Rank']<=5]

feature_sets = ['all', '50', '30', '20', '10']

mae_values = [
    "{:.2f}".format(mean_absolute_error(Y_test, preds_all)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds50)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds30)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds20)),
    "{:.2f}".format(mean_absolute_error(Y_test, preds10))
]

results_df = pd.DataFrame({'Features' : feature_sets, 'MAE' : mae_values})

results_df.style.hide_index()
results_df.to_clipboard()
display(results_df)

possible features: ['MP3', 'MP_pg3', 'FG3', 'FG_pg3', 'FGA3', 'FGA_pg3', 'FG%_pg3', '2P3', '2P_pg3', '2PA3', '2PA_pg3', '2P%_pg3', '3P3', '3P_pg3', '3PA3', '3PA_pg3', '3P%_pg3', 'FT3', 'FT_pg3', 'FTA3', 'FTA_pg3', 'FT%3', 'FT%_pg3', 'TS%_pg3', 'ORB3', 'ORB_pg3', 'DRB3', 'DRB_pg3', 'TRB3', 'TRB_pg3', 'AST3', 'AST_pg3', 'STL3', 'STL_pg3', 'BLK3', 'BLK_pg3', 'TOV3', 'TOV_pg3', 'PF3', 'PF_pg3', 'PTS3', 'PTS_pg3', 'GmSc3', 'GmSc_pg3', 'BPM_pg3', 'EFF3', 'EFF_pg3', 'c_MP3Rank3', 'c_FG3Rank3', 'c_FGARank3', 'c_FG%Rank3', 'c_2PRank3', 'c_2PARank3', 'c_2P%Rank3', 'c_3PRank3', 'c_3PARank3', 'c_3P%Rank3', 'c_FTRank3', 'c_FTARank3', 'c_FT%Rank3', 'c_TS%Rank3', 'c_ORBRank3', 'c_DRBRank3', 'c_TRBRank3', 'c_ASTRank3', 'c_STLRank3', 'c_BLKRank3', 'c_TOVRank3', 'c_PFRank3', 'c_PTSRank3', 'c_GMScRank3', 'c_BPMRank3', 'c_EFFRank3', 'c_DKPts3', 'c_DKPtsRank3', 'MinutesPlayed', 'MinutesPlayed_pg', 'FieldGoals', 'FieldGoals_pg', 'FieldGoalAttempts', 'FieldGoalAttempts_pg', 'FieldGoalPercentage_pg', 'TwoPoin

Unnamed: 0,Features,MAE
0,all,5.8
1,50,5.82
2,30,5.83
3,20,5.86
4,10,5.94


In [19]:
pdf.sort_values(by = ['Date', 'Act_C_DKPts'], ascending = [True, False]).head(n=20)

Unnamed: 0,Season,Date,Team,Defense,Player,Act_C_DKPtsRank,Act_C_DKPts,Pred_C_DKPts_all,PredictedallRank,Pred_C_DKPts_50,Predicted50Rank,Pred_C_DKPts_30,Predicted30Rank,Pred_C_DKPts_20,Predicted20Rank,Pred_C_DKPts_10,Predicted10Rank
7237,2012,0,CLE,WAS,Anderson Varejão,1.0,55.1,46.0458,1.0,44.8155,1.0,45.3379,1.0,44.8066,1.0,43.0665,1.0
12620,2012,0,MIA,BOS,Chris Bosh,4.0,40.5,32.7024,2.0,31.0779,2.0,33.5473,2.0,34.5685,2.0,30.3185,2.0
5872,2012,0,LAL,DAL,Dwight Howard,6.0,34.0,21.0378,7.0,18.3358,7.0,18.7834,7.0,19.5316,7.0,16.531,6.0
2108,2012,0,CLE,WAS,Tristan Thompson,7.0,32.5,27.171,3.0,27.7267,3.0,28.3823,3.0,28.266,3.0,26.9876,4.0
3036,2012,0,BOS,MIA,Kevin Garnett,12.0,27.4,24.0978,5.0,23.4916,5.0,24.90896,5.0,24.9947,4.0,22.2327,5.0
9714,2012,0,DAL,LAL,Brandan Wright,14.0,26.0,23.3596,6.0,23.9648,4.0,23.3576,6.0,24.7451,5.0,27.0984,3.0
4850,2012,0,WAS,CLE,Earl Barron,16.0,23.6,25.8738,4.0,22.4206,6.0,26.8794,4.0,21.4286,6.0,12.3218,7.0
10426,2012,1,SAS,NOH,Tim Duncan,5.0,45.7,37.2166,1.0,36.7257,1.0,37.4434,1.0,38.4611,1.0,36.4915,1.0
14431,2012,1,IND,TOR,David West,19.0,31.9,28.9017,4.0,28.7544,4.0,29.7751,2.0,29.7839,2.0,27.633,3.0
15103,2012,1,SAC,CHI,DeMarcus Cousins,19.0,31.9,24.2493,8.0,20.8887,10.0,20.042,11.0,21.2032,9.0,21.2288,7.0
