In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.linear_model import LinearRegression


In [3]:
random.seed= 30

In [4]:
pd.set_option('display.max_columns', None)

In [5]:
df= pd.read_csv('./data/player_game_data.csv')

In [6]:
#Cleaning up the data
df['GAME_DATE_EST'] = pd.to_datetime(df['GAME_DATE_EST'])
df = df.set_index('GAME_DATE_EST')
df.dropna(inplace= True)
df.sort_index(inplace= True)

In [7]:
df.head()

Unnamed: 0_level_0,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TNO,PF,PTS,PLUS_MINUS,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS,FTSY_PTS
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2015-10-02,11500001,1610612746,1610612746,1610612743,203912,C.J. Wilcox,2015,1.0,2.0,0.5,1.0,2.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,446,2.0
2015-10-02,11500001,1610612743,1610612746,1610612743,2749,Jameer Nelson,2015,3.0,8.0,0.375,1.0,3.0,0.333,3.0,4.0,0.75,0.0,3.0,3.0,7.0,0.0,0.0,1.0,3.0,10.0,8.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1141,16.5
2015-10-02,11500001,1610612743,1610612746,1610612743,200751,Randy Foye,2015,2.0,4.0,0.5,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,6.0,-16.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1138,3.0
2015-10-02,11500001,1610612743,1610612746,1610612743,201163,Wilson Chandler,2015,5.0,12.0,0.417,1.0,3.0,0.333,0.0,1.0,0.0,1.0,7.0,8.0,2.0,2.0,0.0,0.0,1.0,11.0,-8.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1382,18.0
2015-10-02,11500001,1610612743,1610612746,1610612743,201581,JJ Hickson,2015,1.0,7.0,0.143,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,6.0,0.0,0.0,0.0,3.0,2.0,2.0,-14.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1148,-1.0


In [8]:
df['HOME_PTS_DIF'] = df['PTS_home'] - df['PTS_away']
df['HOME_FG_DIF'] = df['FG_PCT_home'] - df['FG_PCT_away']
df['HOME_FT_DIF'] = df['FT_PCT_home'] - df['FT_PCT_away']
df['HOME_FG3_DIF'] = df['FG3_PCT_home'] - df['FG3_PCT_away']
df['HOME_AST_DIF'] = df['AST_home'] - df['AST_away']
df['HOME_REB_DIF'] = df['REB_home'] - df['REB_away']
df.head()

Unnamed: 0_level_0,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TNO,PF,PTS,PLUS_MINUS,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS,FTSY_PTS,HOME_PTS_DIF,HOME_FG_DIF,HOME_FT_DIF,HOME_FG3_DIF,HOME_AST_DIF,HOME_REB_DIF
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1
2015-10-02,11500001,1610612746,1610612746,1610612743,203912,C.J. Wilcox,2015,1.0,2.0,0.5,1.0,2.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,446,2.0,7.0,-0.018,0.019,-0.108,6.0,-18.0
2015-10-02,11500001,1610612743,1610612746,1610612743,2749,Jameer Nelson,2015,3.0,8.0,0.375,1.0,3.0,0.333,3.0,4.0,0.75,0.0,3.0,3.0,7.0,0.0,0.0,1.0,3.0,10.0,8.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1141,16.5,7.0,-0.018,0.019,-0.108,6.0,-18.0
2015-10-02,11500001,1610612743,1610612746,1610612743,200751,Randy Foye,2015,2.0,4.0,0.5,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,6.0,-16.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1138,3.0,7.0,-0.018,0.019,-0.108,6.0,-18.0
2015-10-02,11500001,1610612743,1610612746,1610612743,201163,Wilson Chandler,2015,5.0,12.0,0.417,1.0,3.0,0.333,0.0,1.0,0.0,1.0,7.0,8.0,2.0,2.0,0.0,0.0,1.0,11.0,-8.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1382,18.0,7.0,-0.018,0.019,-0.108,6.0,-18.0
2015-10-02,11500001,1610612743,1610612746,1610612743,201581,JJ Hickson,2015,1.0,7.0,0.143,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5.0,6.0,0.0,0.0,0.0,3.0,2.0,2.0,-14.0,103.0,0.407,0.769,0.281,22.0,39.0,96.0,0.425,0.75,0.389,16.0,57.0,1,1148,-1.0,7.0,-0.018,0.019,-0.108,6.0,-18.0


In [9]:
df.iloc[:,[7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-2]]

Unnamed: 0_level_0,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TNO,PF,PTS,PLUS_MINUS,HOME_AST_DIF
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2015-10-02,1.0,2.0,0.500,1.0,2.0,0.500,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,6.0
2015-10-02,3.0,8.0,0.375,1.0,3.0,0.333,3.0,4.0,0.750,0.0,3.0,3.0,7.0,0.0,0.0,1.0,3.0,10.0,8.0,6.0
2015-10-02,2.0,4.0,0.500,2.0,2.0,1.000,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,6.0,-16.0,6.0
2015-10-02,5.0,12.0,0.417,1.0,3.0,0.333,0.0,1.0,0.000,1.0,7.0,8.0,2.0,2.0,0.0,0.0,1.0,11.0,-8.0,6.0
2015-10-02,1.0,7.0,0.143,0.0,0.0,0.000,0.0,0.0,0.000,1.0,5.0,6.0,0.0,0.0,0.0,3.0,2.0,2.0,-14.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-26,2.0,4.0,0.500,1.0,2.0,0.500,3.0,4.0,0.750,0.0,3.0,3.0,0.0,0.0,1.0,2.0,2.0,8.0,6.0,-2.0
2021-05-26,2.0,9.0,0.222,0.0,4.0,0.000,0.0,0.0,0.000,0.0,3.0,3.0,1.0,1.0,0.0,1.0,2.0,4.0,8.0,-2.0
2021-05-26,8.0,16.0,0.500,3.0,5.0,0.600,1.0,1.0,1.000,2.0,2.0,4.0,15.0,0.0,0.0,2.0,4.0,20.0,4.0,8.0
2021-05-26,5.0,8.0,0.625,4.0,7.0,0.571,1.0,2.0,0.500,1.0,3.0,4.0,1.0,1.0,0.0,1.0,3.0,15.0,13.0,-2.0


In [10]:
## need to change commented row, also model is using rolling average of fantasy points, this needs to be changed
def player_model():
    # input(Date)
    put = input('Which player would you like to predict?')
    data_df = df.loc[df['PLAYER_NAME'] == put]
    data_df = data_df.iloc[:,[7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-2]].rolling(window=5).mean()
    data_df['FTSY_PTS']= df.loc[df['PLAYER_NAME']== put]['FTSY_PTS']
    data_df.dropna(inplace =True)
    
    X= data_df.drop(columns = ['FTSY_PTS']).to_numpy()
    y= data_df['FTSY_PTS'].to_numpy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .15, shuffle= False)
    
    ss = StandardScaler()
    X_train_sc = ss.fit_transform(X_train)
    X_test_sc = ss.transform(X_test)
    
    train_seq = TimeseriesGenerator(X_train_sc, y_train, length=1, batch_size=64)
    test_seq = TimeseriesGenerator(X_test_sc, y_test, length=1, batch_size=64)
    
    model = Sequential()

    model.add(LSTM(64, input_shape=(1,20), return_sequences=True))
    model.add(LSTM(16, return_sequences=False))

    model.add(Dense(16, activation= 'relu'))
    model.add(Dropout(.1))

    model.add(Dense(8, activation= 'relu'))
    model.add(Dropout(.1))

    model.add(Dense(1))
    model.compile(optimizer='adam', loss= 'mse', metrics= ['mae'])

    early_stop = EarlyStopping(patience = 5)

    history=model.fit(train_seq, epochs=100, validation_data= test_seq, verbose=1, callbacks = [early_stop])
    
    print(model.predict(test_seq))
    errors = y_test-model.predict(test_seq)
    print(errors)
    df_predicts = pd.DataFrame(model.predict(test_seq))
    print(model.score(test_seq))
    return df_predicts.T

In [11]:
player_model()

Which player would you like to predict? Stephen Curry


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
[[30.372019]
 [30.681364]
 [29.026373]
 [28.14567 ]
 [29.150152]
 [31.222929]
 [29.634775]
 [27.936073]
 [33.24307 ]
 [32.739887]
 [28.933084]
 [31.585466]
 [28.421316]
 [25.496841]
 [26.541178]
 [25.551935]
 [27.855818]
 [24.437187]
 [26.623013]
 [24.361551]
 [27.008776]
 [25.697294]
 [32.215008]
 [32.90305 ]
 [31.901148]
 [34.41495 ]
 [34.90304 ]
 [34.486523]
 [34.812237]
 [34.46798 ]
 [32.788944]
 [32.532497]
 [33.327538]
 [32.04181 ]
 [32.279224]
 [31.678453]
 [29.353

AttributeError: 'Sequential' object has no attribute 'score'

In [56]:
df.loc[df['PLAYER_NAME'] == 'Stephen Curry'].tail(59)

Unnamed: 0_level_0,GAME_ID,TEAM_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TNO,PF,PTS,PLUS_MINUS,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,SECONDS,FTSY_PTS
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2021-01-04,22000101,1610612744,1610612744,1610612758,201939,Stephen Curry,2020,9.0,18.0,0.5,5.0,12.0,0.417,7.0,7.0,1.0,0.0,9.0,9.0,8.0,0.0,0.0,2.0,3.0,30.0,26.0,137.0,0.526,0.857,0.535,41.0,48.0,106.0,0.378,0.718,0.286,20.0,52.0,1,1834,40.0
2021-01-06,22000116,1610612744,1610612744,1610612746,201939,Stephen Curry,2020,5.0,17.0,0.294,1.0,6.0,0.167,2.0,4.0,0.5,1.0,3.0,4.0,5.0,1.0,0.0,0.0,3.0,13.0,-10.0,101.0,0.453,0.789,0.267,25.0,35.0,108.0,0.444,1.0,0.324,19.0,48.0,0,2099,12.5
2021-01-08,22000130,1610612744,1610612744,1610612746,201939,Stephen Curry,2020,13.0,24.0,0.542,9.0,14.0,0.643,3.0,3.0,1.0,0.0,2.0,2.0,11.0,3.0,0.0,2.0,3.0,38.0,13.0,115.0,0.488,0.833,0.488,25.0,38.0,105.0,0.482,0.846,0.452,23.0,37.0,1,2175,49.5
2021-01-10,22000148,1610612744,1610612744,1610612761,201939,Stephen Curry,2020,2.0,16.0,0.125,1.0,10.0,0.1,6.0,6.0,1.0,3.0,6.0,9.0,6.0,0.0,0.0,5.0,3.0,11.0,7.0,106.0,0.409,0.895,0.283,30.0,51.0,105.0,0.406,0.636,0.342,20.0,48.0,1,2292,10.0
2021-01-12,22000163,1610612744,1610612744,1610612754,201939,Stephen Curry,2020,7.0,17.0,0.412,3.0,8.0,0.375,3.0,5.0,0.6,1.0,3.0,4.0,3.0,2.0,0.0,2.0,3.0,20.0,-2.0,95.0,0.382,0.636,0.342,25.0,46.0,104.0,0.443,0.68,0.321,26.0,50.0,0,2153,18.5
2021-01-14,22000177,1610612744,1610612743,1610612744,201939,Stephen Curry,2020,14.0,23.0,0.609,5.0,11.0,0.455,2.0,2.0,1.0,0.0,11.0,11.0,4.0,2.0,0.0,7.0,2.0,35.0,-21.0,114.0,0.456,0.778,0.297,29.0,50.0,104.0,0.469,0.708,0.379,29.0,43.0,1,2218,40.0
2021-01-18,22000212,1610612744,1610612747,1610612744,201939,Stephen Curry,2020,8.0,22.0,0.364,3.0,12.0,0.25,7.0,7.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,4.0,2.0,26.0,-3.0,113.0,0.482,0.727,0.31,23.0,50.0,115.0,0.506,0.75,0.324,31.0,32.0,0,2137,21.5
2021-01-20,22000223,1610612744,1610612744,1610612759,201939,Stephen Curry,2020,10.0,17.0,0.588,4.0,8.0,0.5,2.0,2.0,1.0,1.0,10.0,11.0,7.0,1.0,0.0,4.0,2.0,26.0,30.0,121.0,0.505,0.7,0.395,31.0,54.0,99.0,0.372,0.806,0.121,17.0,43.0,1,1888,38.5
2021-01-21,22000228,1610612744,1610612744,1610612752,201939,Stephen Curry,2020,9.0,19.0,0.474,5.0,14.0,0.357,7.0,8.0,0.875,0.0,3.0,3.0,4.0,2.0,0.0,4.0,3.0,30.0,-16.0,104.0,0.384,0.744,0.237,27.0,46.0,119.0,0.463,0.78,0.458,24.0,55.0,0,2079,28.0
2021-01-23,22000242,1610612744,1610612762,1610612744,201939,Stephen Curry,2020,9.0,18.0,0.5,5.0,10.0,0.5,1.0,2.0,0.5,0.0,7.0,7.0,7.0,0.0,0.0,1.0,2.0,24.0,-24.0,127.0,0.479,0.773,0.4,28.0,57.0,108.0,0.452,0.667,0.359,30.0,35.0,1,1626,30.5


In [53]:
errors = y_test-model.predict(test_seq)

In [55]:
print(errors)

[[-2.67947845 -0.67947845  1.12052155 ...  1.82052155  0.72052155
  -3.27947845]
 [-3.57360344 -1.57360344  0.22639656 ...  0.92639656 -0.17360344
  -4.17360344]
 [-3.59280949 -1.59280949  0.20719051 ...  0.90719051 -0.19280949
  -4.19280949]
 ...
 [-0.77758865  1.22241135  3.02241135 ...  3.72241135  2.62241135
  -1.37758865]
 [-2.29109268 -0.29109268  1.50890732 ...  2.20890732  1.10890732
  -2.89109268]
 [-2.20541649 -0.20541649  1.59458351 ...  2.29458351  1.19458351
  -2.80541649]]


In [6]:
# have to write out the function, as of right now the dataframe will not differentiate between players for the rolling avg
data_df = df.iloc[:,[0,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,-2, -1]].rolling(window=5).mean()

In [7]:
data_df.head()

Unnamed: 0_level_0,GAME_ID,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TNO,PF,PTS,PLUS_MINUS,SECONDS,FTSY_PTS
GAME_DATE_EST,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2021-05-26,,,,,,,,,,,,,,,,,,,,,,
2021-05-26,,,,,,,,,,,,,,,,,,,,,,
2021-05-26,,,,,,,,,,,,,,,,,,,,,,
2021-05-26,,,,,,,,,,,,,,,,,,,,,,
2021-05-26,42000102.0,3.6,5.6,0.4634,0.4,0.6,0.3,1.2,2.2,0.2266,1.4,3.2,4.6,2.2,0.6,0.8,1.0,1.6,8.8,8.4,1004.2,15.5


In [8]:
data_df.dropna(inplace= True)

In [11]:
X= data_df.drop(columns = ['FTSY_PTS']).to_numpy()
y= data_df['FTSY_PTS'].to_numpy()

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .15, shuffle= False)

In [13]:
X.shape

(82880, 21)

In [15]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

In [16]:
train_seq = TimeseriesGenerator(X_train_sc, y_train, length=5, batch_size=64)
test_seq = TimeseriesGenerator(X_test_sc, y_test, length=5, batch_size=64)

In [None]:
model = Sequential()

#add rnn layer
model.add(GRU(64, input_shape=(5,21), return_sequences=True))
model.add(GRU(16, return_sequences=False))
          
model.add(Dense(16, activation= 'relu'))
model.add(Dropout(.1))

model.add(Dense(8, activation= 'relu'))
model.add(Dropout(.1))

model.add(Dense(1))
model.compile(optimizer='adam', loss= 'mse', metrics= ['mae'])

early_stop = EarlyStopping(patience = 5)

history=model.fit(train_seq, epochs=30, validation_data= test_seq, verbose=1, callbacks = [early_stop])

In [48]:
model.predict(test_seq)

array([[19.841362],
       [17.684353],
       [19.343044],
       ...,
       [16.711882],
       [16.517769],
       [12.845697]], dtype=float32)

In [51]:
errors = y_test-model.predict(test_seq)

mse = np.square(errors).mean()
rmse = np.sqrt(mse)

print(mse)
print(rmse)

In [52]:
print(model.predict(test_seq)[0:1000])

[[19.841362 ]
 [17.684353 ]
 [19.343044 ]
 [17.166016 ]
 [15.139993 ]
 [14.919712 ]
 [18.153759 ]
 [15.137941 ]
 [23.163143 ]
 [24.105335 ]
 [21.408482 ]
 [19.837719 ]
 [15.955694 ]
 [11.50343  ]
 [14.8386965]
 [15.779406 ]
 [15.550947 ]
 [16.456833 ]
 [17.328632 ]
 [15.039159 ]
 [15.364186 ]
 [13.695299 ]
 [14.314484 ]
 [11.779466 ]
 [11.512665 ]
 [17.543655 ]
 [22.711802 ]
 [21.719673 ]
 [20.53285  ]
 [19.919365 ]
 [16.533344 ]
 [14.212252 ]
 [13.689598 ]
 [15.538224 ]
 [12.590355 ]
 [11.242594 ]
 [10.185641 ]
 [10.62919  ]
 [ 9.326572 ]
 [14.101164 ]
 [18.922894 ]
 [18.365925 ]
 [21.209934 ]
 [21.638123 ]
 [22.016663 ]
 [22.765955 ]
 [22.124569 ]
 [21.376816 ]
 [19.70163  ]
 [17.281723 ]
 [13.958823 ]
 [12.421763 ]
 [11.164339 ]
 [10.264442 ]
 [ 9.24349  ]
 [10.628715 ]
 [13.14968  ]
 [14.98452  ]
 [16.428125 ]
 [20.283411 ]
 [19.217712 ]
 [18.783558 ]
 [19.903454 ]
 [15.698248 ]
 [12.119752 ]
 [14.509365 ]
 [14.227555 ]
 [11.738189 ]
 [14.840073 ]
 [15.001234 ]
 [12.413428 ]
 [13.0