### Games include pre and post season

In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

from sklearn.linear_model import LinearRegression

In [36]:
df = pd.read_csv('./data/player_game_data.csv')


In [37]:
df.head()


Unnamed: 0,GAME_DATE_EST,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,FGM,FGA,...,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS,FTSY_PTS
0,2021-05-26,42000102,1610612755,1610612755,1610612764,2730,Dwight Howard,2020,1.0,4.0,...,45.0,95.0,0.402,0.633,0.091,22.0,40.0,1,957,10.5
1,2021-05-26,42000102,1610612755,1610612755,1610612764,201229,Anthony Tolliver,2020,0.0,0.0,...,45.0,95.0,0.402,0.633,0.091,22.0,40.0,1,104,0.0
2,2021-05-26,42000102,1610612755,1610612755,1610612764,201588,George Hill,2020,2.0,5.0,...,45.0,95.0,0.402,0.633,0.091,22.0,40.0,1,1360,9.0
3,2021-05-26,42000102,1610612755,1610612755,1610612764,201980,Danny Green,2020,2.0,3.0,...,45.0,95.0,0.402,0.633,0.091,22.0,40.0,1,1304,12.0
4,2021-05-26,42000102,1610612755,1610612755,1610612764,202699,Tobias Harris,2020,9.0,13.0,...,45.0,95.0,0.402,0.633,0.091,22.0,40.0,1,1397,26.5


In [38]:
df.columns


Index(['GAME_DATE_EST', 'GAME_ID', 'TEAM_ID', 'HOME_TEAM_ID',
       'VISITOR_TEAM_ID', 'PLAYER_ID', 'PLAYER_NAME', 'SEASON', 'FGM', 'FGA',
       'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB',
       'DREB', 'REB', 'AST', 'STL', 'BLK', 'TNO', 'PF', 'PTS', 'PLUS_MINUS',
       'PTS_home', 'FG_PCT_home', 'FT_PCT_home', 'FG3_PCT_home', 'AST_home',
       'REB_home', 'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away',
       'AST_away', 'REB_away', 'HOME_TEAM_WINS', 'SECONDS', 'FTSY_PTS'],
      dtype='object')

In [39]:
df['GAME_DATE_EST'] = pd.to_datetime(df['GAME_DATE_EST'])

In [40]:
players = df[['PLAYER_NAME', 'SEASON', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TNO', 'PF', 'PTS', 'PLUS_MINUS', 'SECONDS', 'FTSY_PTS']]

In [41]:
players.fillna(0, inplace = True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


In [42]:
play_season = players.groupby(['PLAYER_NAME', 'SEASON'], as_index= False)


In [43]:
play_season.mean()

Unnamed: 0,PLAYER_NAME,SEASON,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,...,REB,AST,STL,BLK,TNO,PF,PTS,PLUS_MINUS,SECONDS,FTSY_PTS
0,AJ Hammons,2016,0.653846,1.461538,0.171615,0.096154,0.230769,0.073712,0.173077,0.384615,...,1.153846,0.134615,0.019231,0.307692,0.269231,0.653846,1.576923,0.038462,283.634615,2.298077
1,AJ Hammons,2017,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,Aaron Brooks,2015,2.411111,6.011111,0.318578,0.855556,2.433333,0.257644,0.688889,0.922222,...,1.322222,2.233333,0.411111,0.122222,1.077778,1.666667,6.366667,-0.022222,841.233333,7.194444
3,Aaron Brooks,2016,1.436782,3.643678,0.256851,0.563218,1.563218,0.209759,0.436782,0.540230,...,0.850575,1.494253,0.298851,0.103448,0.793103,1.160920,3.873563,-0.448276,651.632184,4.666667
4,Aaron Brooks,2017,0.352273,0.897727,0.129193,0.125000,0.409091,0.075750,0.113636,0.170455,...,0.193182,0.284091,0.068182,0.000000,0.136364,0.352273,0.943182,-0.886364,145.420455,0.960227
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3673,Zhou Qi,2017,0.195652,0.826087,0.094913,0.086957,0.521739,0.043478,0.173913,0.260870,...,0.608696,0.043478,0.043478,0.391304,0.260870,0.434783,0.652174,-1.413043,189.543478,1.217391
3674,Zhou Qi,2018,0.200000,0.200000,0.200000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.400000,-0.400000,11.400000,0.400000
3675,Zion Williamson,2019,7.656250,12.781250,0.542250,0.218750,0.562500,0.125000,4.250000,6.562500,...,5.500000,1.843750,0.687500,0.281250,2.093750,1.656250,19.781250,2.843750,1456.593750,20.453125
3676,Zion Williamson,2020,9.958333,16.527778,0.607681,0.138889,0.555556,0.118056,5.763889,8.444444,...,7.194444,3.263889,1.013889,0.597222,2.652778,2.152778,25.819444,0.402778,1950.597222,29.229167


In [44]:
curry = df.loc[df['PLAYER_NAME'] == 'Stephen Curry']

In [45]:
#curry_2016 = df.loc[(df['PLAYER_NAME'] == 'Stephen Curry') & (df['SEASON'] == 2016)]


In [46]:
#curry_2016 = curry_2016[['PLAYER_NAME', 'SEASON', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TNO', 'PF', 'PTS', 'PLUS_MINUS', 'SECONDS', 'FTSY_PTS']]

In [47]:
curry.set_index('GAME_DATE_EST', inplace = True)

In [48]:
curry.head()

Unnamed: 0_level_0,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,FGM,FGA,FG_PCT,...,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS,FTSY_PTS
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-05-21,52000211,1610612744,1610612744,1610612763,201939,Stephen Curry,2020,13.0,28.0,0.464,...,48.0,117.0,0.408,0.846,0.429,22.0,47.0,0,2843,34.5
2021-05-19,52000121,1610612744,1610612747,1610612744,201939,Stephen Curry,2020,12.0,23.0,0.522,...,49.0,100.0,0.446,0.733,0.441,19.0,46.0,1,2437,30.5
2021-05-16,22001070,1610612744,1610612744,1610612763,201939,Stephen Curry,2020,16.0,36.0,0.444,...,46.0,101.0,0.435,0.789,0.24,25.0,39.0,1,2397,42.5
2021-05-14,22001059,1610612744,1610612744,1610612740,201939,Stephen Curry,2020,,,,...,46.0,122.0,0.465,0.679,0.407,21.0,51.0,1,0,
2021-05-11,22001039,1610612744,1610612744,1610612756,201939,Stephen Curry,2020,7.0,22.0,0.318,...,40.0,116.0,0.519,0.9,0.432,26.0,36.0,1,2191,17.0


In [49]:
curry.dropna(inplace =True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curry.dropna(inplace =True)


In [50]:
curry.loc[curry['SECONDS']== 0]

Unnamed: 0_level_0,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,FGM,FGA,FG_PCT,...,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS,FTSY_PTS
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [51]:
curry.sort_index(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curry.sort_index(inplace = True)


In [52]:
X= curry.drop(columns = ['FTSY_PTS', 'PLAYER_NAME'])
y= curry['FTSY_PTS']


In [53]:
X.head()

Unnamed: 0_level_0,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,SEASON,FGM,FGA,FG_PCT,FG3M,...,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-10-05,11500009,1610612744,1610612744,1610612761,201939,2015,5.0,7.0,0.714,3.0,...,29.0,37.0,87.0,0.376,0.826,0.25,15.0,48.0,1,1167
2015-10-08,11500026,1610612744,1610612757,1610612744,201939,2015,6.0,14.0,0.429,6.0,...,25.0,48.0,101.0,0.383,0.794,0.375,20.0,38.0,1,1547
2015-10-13,11500056,1610612744,1610612744,1610612743,201939,2015,5.0,13.0,0.385,1.0,...,23.0,43.0,114.0,0.495,0.6,0.429,29.0,48.0,0,1586
2015-10-15,11500063,1610612744,1610612744,1610612745,201939,2015,6.0,7.0,0.857,1.0,...,31.0,43.0,101.0,0.365,0.828,0.233,24.0,55.0,1,1002
2015-10-17,11500073,1610612744,1610612747,1610612744,201939,2015,7.0,13.0,0.538,5.0,...,18.0,36.0,70.0,0.387,0.684,0.45,14.0,37.0,1,1344


In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .25, shuffle= False)

In [55]:
lr= LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [56]:
lr.predict(X_train)

array([14. , 33.5, 13.5, 30. , 19. , 41. , 46.5, 35. , 66.5, 36.5, 34. ,
       49. , 21.5, 23.5, 36.5, 48. , 26. , 38.5, 45. , 29. , 27.5, 34.5,
       47. , 34. , 30.5, 41.5, 41.5, 25. , 39. , 39. , 29.5, 30.5, 39.5,
       28.5, 28.5, 44. , 10. , 32. , 23. , 33.5, 50.5, 26. , 44.5, 26.5,
       40.5, 45. , 42.5, 59. , 44.5, 30.5, 25. , 14. , 50. , 34. , 42.5,
       41.5, 31.5, 30.5, 50. , 41.5, 60. , 47. , 32. , 14. , 51.5, 24. ,
       33. , 34. , 35.5, 36. , 44. , 17. , 33.5, 37.5, 48. , 29. , 37.5,
       30. , 26. , 49.5, 34.5, 38.5, 23. , 40.5, 52. , 31. ,  3.5, 41. ,
       38.5, 32.5, 27.5, 25.5, 13.5, 41. , 40.5, 39. ,  9. , 27. , 15.5,
       38. , 27. , 18.5, 12. , 11.5, 25.5, 23.5, 28. , 19.5, 41.5, 31. ,
       21. , 22. , 24.5, 22.5, 28.5, 28. , 47.5, 29. , 38. , 38. , 37.5,
       26. , 17.5, 33. , 43.5, 30.5, 41. , 20.5, 28.5, 28.5, 32.5, 34. ,
       26. , 15.5, 34.5, 31.5, 18. , 23. , 30. , 22.5, 30. , 17.5, 34.5,
       19.5, 19.5, 31.5, 40. , 32. , 32.5, 30.5, 29

In [57]:
lr.score(X_test, y_test)

1.0

In [67]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train.drop(columns= ['GAME_ID','TEAM_ID','HOME_TEAM_ID','VISITOR_TEAM_ID','SEASON', 'HOME_TEAM_WINS']))
X_test_sc = ss.transform(X_test.drop(columns= ['GAME_ID','TEAM_ID','HOME_TEAM_ID','VISITOR_TEAM_ID','SEASON', 'HOME_TEAM_WINS']))

In [68]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test.drop(columns= ['GAME_ID','TEAM_ID','HOME_TEAM_ID','VISITOR_TEAM_ID','PLAYER_NAME','SEASON', 'HOME_TEAM_WINS']))

KeyError: "['PLAYER_NAME'] not found in axis"

In [69]:
train_seq = TimeseriesGenerator(X_train_sc, y_train, length=5, batch_size=64)
test_seq = TimeseriesGenerator(X_test_sc, y_test, length=5, batch_size=64)

In [70]:
train_seq[0]

(array([[[-1.61547828,  0.        , -0.50130856, ...,  0.71307526,
           0.93173594, -2.61510605],
         [-1.61547607,  0.        ,  1.3796624 , ..., -0.96474888,
           0.93173594, -1.40985909],
         [-1.61547218,  0.        , -0.50130856, ...,  0.71307526,
          -1.07326545, -1.28616269],
         [-1.61547127,  0.        , -0.50130856, ...,  1.88755217,
           0.93173594, -3.13843696],
         [-1.61546998,  0.        , -0.06723834, ..., -1.1325313 ,
           0.93173594, -2.0537147 ]],
 
        [[-1.61547607,  0.        ,  1.3796624 , ..., -0.96474888,
           0.93173594, -1.40985909],
         [-1.61547218,  0.        , -0.50130856, ...,  0.71307526,
          -1.07326545, -1.28616269],
         [-1.61547127,  0.        , -0.50130856, ...,  1.88755217,
           0.93173594, -3.13843696],
         [-1.61546998,  0.        , -0.06723834, ..., -1.1325313 ,
           0.93173594, -2.0537147 ],
         [-1.6154666 ,  0.        , -0.06723834, ...,  2.2231

In [71]:
X_train.shape

(340, 39)

In [72]:
model = Sequential()

#add rnn layer
model.add(GRU(64, input_shape=(5,32), return_sequences=True))
model.add(GRU(16, return_sequences=False))
          
model.add(Dense(16, activation= 'relu'))
model.add(Dropout(.1))
model.add(Dense(8, activation= 'relu'))
model.add(Dropout(.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss= 'mse', metrics= ['mae'])

history=model.fit(train_seq, epochs=30, validation_data= test_seq, verbose=1)

Epoch 1/30


InvalidArgumentError:    Specified a list with shape [?,32] from a tensor with shape [64,39]
	 [[{{node TensorArrayUnstack/TensorListFromTensor}}]]
	 [[sequential_1/gru_2/PartitionedCall]] [Op:__inference_train_function_6567]

Function call stack:
train_function -> train_function -> train_function


In [None]:
model.predict(train_seq)

In [None]:
model.score(test_seq)

In [None]:
model_2 = Sequential()

#add rnn layer
model_2.add(LSTM(64, input_shape=(5,32), return_sequences=True))
model_2.add(LSTM(16, return_sequences=False))
          
model_2.add(Dense(16, activation= 'relu'))
model_2.add(Dropout(.1))
model_2.add(Dense(8, activation= 'relu'))
model_2.add(Dropout(.2))
model_2.add(Dense(1))

model_2.compile(optimizer='adam', loss= 'mse', metrics= ['mae'])

history=model_2.fit(train_seq, epochs=20, validation_data= test_seq, verbose=1)