In [1]:
# DataFrame manipulation libraries:
import pandas as pd
import numpy as np

# Visualization libraries:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Evaluation and processing libraries:
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Keras libraries:
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Dropout, GRU
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.preprocessing.sequence import TimeseriesGenerator

Using TensorFlow backend.


In [2]:
df = pd.read_csv('../../clean_data/weekly_player_performance_2015_to_2018.csv')
df.head()

Unnamed: 0,Date,Rank,PlayerID,Name,Team,Position,Week,Opponent,PassingYards,PassingTouchdowns,PassingInterceptions,RushingYards,RushingTouchdowns,Receptions,ReceivingYards,ReceivingTouchdowns,FumblesLost,FantasyPointsPerGame,FantasyPoints
0,2015-09-13,1,16668,Carlos Hyde,SF,RB,1,MIN,0,0,0,168,2,2,14,0,0,30.2,30.2
1,2015-09-13,2,4314,Tom Brady,NE,QB,1,PIT,288,4,0,1,0,0,0,0,0,27.6,27.62
2,2015-09-13,3,10974,Rob Gronkowski,NE,TE,1,PIT,0,0,0,0,0,5,94,3,0,27.4,27.4
3,2015-09-13,4,13291,Julio Jones,ATL,WR,1,PHI,0,0,0,0,0,9,141,2,0,26.1,26.1
4,2015-09-13,5,2428,Carson Palmer,ARI,QB,1,NO,307,3,0,14,0,0,0,0,0,25.7,25.68


In [3]:
df['Date'] = pd.to_datetime(df['Date'], format = '%Y-%m-%d')
df.dtypes

Date                    datetime64[ns]
Rank                             int64
PlayerID                         int64
Name                            object
Team                            object
Position                        object
Week                             int64
Opponent                        object
PassingYards                     int64
PassingTouchdowns                int64
PassingInterceptions             int64
RushingYards                     int64
RushingTouchdowns                int64
Receptions                       int64
ReceivingYards                   int64
ReceivingTouchdowns              int64
FumblesLost                      int64
FantasyPointsPerGame           float64
FantasyPoints                  float64
dtype: object

In [4]:
# for index, row in df.iterrows():
#     val = df.loc[index, 'Position']
#     # print(val)
#     if val == 'QB':
#         df.loc[index, 'Position'] = 1
#     if val == 'WR':
#         df.loc[index, 'Position'] = 2
#     if val == 'RB':
#         df.loc[index, 'Position'] = 3
#     if val == 'TE':
#         df.loc[index, 'Position'] = 4
#     if val == 'FB':
#         df.loc[index, 'Position'] = 5
# df.head()

In [5]:
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Rank,PlayerID,Name,Team,Position,Week,Opponent,PassingYards,PassingTouchdowns,PassingInterceptions,RushingYards,RushingTouchdowns,Receptions,ReceivingYards,ReceivingTouchdowns,FumblesLost,FantasyPointsPerGame,FantasyPoints
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2015-09-13,1,16668,Carlos Hyde,SF,RB,1,MIN,0,0,0,168,2,2,14,0,0,30.2,30.2
2015-09-13,2,4314,Tom Brady,NE,QB,1,PIT,288,4,0,1,0,0,0,0,0,27.6,27.62
2015-09-13,3,10974,Rob Gronkowski,NE,TE,1,PIT,0,0,0,0,0,5,94,3,0,27.4,27.4
2015-09-13,4,13291,Julio Jones,ATL,WR,1,PHI,0,0,0,0,0,9,141,2,0,26.1,26.1
2015-09-13,5,2428,Carson Palmer,ARI,QB,1,NO,307,3,0,14,0,0,0,0,0,25.7,25.68


In [6]:
df_with_dummies = pd.get_dummies(df, columns=['Team', 'Opponent', 'Position'], drop_first=False)
features = [col for col in df_with_dummies.columns if col not in ['Name', 'PlayerID', 'Week']]
df_with_dummies[features] = df_with_dummies[features].astype(float)
X = df_with_dummies[[col for col in df_with_dummies.columns if col != 'FantasyPoints']]
y = df_with_dummies['FantasyPoints']

In [7]:
y[:10]

Date
2015-09-13    30.20
2015-09-13    27.62
2015-09-13    27.40
2015-09-13    26.10
2015-09-13    25.68
2015-09-13    24.96
2015-09-13    23.80
2015-09-13    23.22
2015-09-13    23.06
2015-09-13    23.00
Name: FantasyPoints, dtype: float64

In [8]:
X.dtypes

Rank                    float64
PlayerID                  int64
Name                     object
Week                      int64
PassingYards            float64
PassingTouchdowns       float64
PassingInterceptions    float64
RushingYards            float64
RushingTouchdowns       float64
Receptions              float64
ReceivingYards          float64
ReceivingTouchdowns     float64
FumblesLost             float64
FantasyPointsPerGame    float64
Team_ARI                float64
Team_ATL                float64
Team_BAL                float64
Team_BUF                float64
Team_CAR                float64
Team_CHI                float64
Team_CIN                float64
Team_CLE                float64
Team_DAL                float64
Team_DEN                float64
Team_DET                float64
Team_GB                 float64
Team_HOU                float64
Team_IND                float64
Team_JAX                float64
Team_KC                 float64
                         ...   
Opponent

In [9]:
df_with_dummies.head(10)

Unnamed: 0_level_0,Rank,PlayerID,Name,Week,PassingYards,PassingTouchdowns,PassingInterceptions,RushingYards,RushingTouchdowns,Receptions,...,Opponent_SEA,Opponent_SF,Opponent_TB,Opponent_TEN,Opponent_WAS,Position_FB,Position_QB,Position_RB,Position_TE,Position_WR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-09-13,1.0,16668,Carlos Hyde,1,0.0,0.0,0.0,168.0,2.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2015-09-13,2.0,4314,Tom Brady,1,288.0,4.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2015-09-13,3.0,10974,Rob Gronkowski,1,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2015-09-13,4.0,13291,Julio Jones,1,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2015-09-13,5.0,2428,Carson Palmer,1,307.0,3.0,0.0,14.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2015-09-13,6.0,16763,Marcus Mariota,1,209.0,4.0,0.0,6.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2015-09-13,7.0,14986,DeAndre Hopkins,1,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2015-09-13,8.0,6739,Alex Smith,1,243.0,3.0,0.0,15.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2015-09-13,9.0,2593,Aaron Rodgers,1,189.0,3.0,0.0,35.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2015-09-13,10.0,16608,Austin Seferian-Jenkins,1,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [11]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train.drop(columns = ['Name', 'PlayerID', 'Week']))
X_test_sc = ss.transform(X_test.drop(columns = ['Name', 'PlayerID', 'Week']))

In [12]:
X_test_sc[0]

array([ 0.40830156, -0.32569876, -0.26948036, -0.21607813,  0.32653258,
       -0.25192244, -0.51896593, -0.4531034 , -0.38336799, -0.20342918,
       -0.50445492, -0.17684817, -0.18644735, -0.18700964, -0.18794355,
       -0.17860924, -0.17487318, -0.17207467, -0.17684817, -0.17606051,
       -0.18093394, -0.17724085, -0.17625772, -0.18112649, -0.17546773,
       -0.17880398, -0.17625772, -0.17684817,  5.87357131, -0.17880398,
       -0.17958107, -0.18663494, -0.18151105, -0.17977488, -0.17507156,
       -0.18757046, -0.18304223, -0.17996851, -0.18112649, -0.18246935,
       -0.1818949 , -0.18074121, -0.17782843, -0.1818949 , -0.18437293,
       -0.18151105, -0.18380366, -0.17938707, -0.17841432,  5.4976804 ,
       -0.18285144, -0.18285144, -0.17327891, -0.18112649, -0.17821921,
       -0.17821921, -0.18361356, -0.17821921, -0.17899853, -0.17919289,
       -0.17880398, -0.18112649, -0.17841432, -0.18074121, -0.17782843,
       -0.17802392, -0.18456235, -0.18035522, -0.17802392, -0.17

In [13]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length=4, batch_size=64)

In [14]:
train_sequences[0]

(array([[[ 0.36023485, -0.32569876, -0.26948036, ..., -0.59244694,
          -0.5021437 ,  1.24704058],
         [ 0.97308538, -0.22467311, -0.26948036, ..., -0.59244694,
          -0.5021437 , -0.80189852],
         [ 0.79283522, -0.32569876, -0.26948036, ..., -0.59244694,
          -0.5021437 ,  1.24704058],
         [ 1.63400262, -0.32569876, -0.26948036, ...,  1.68791487,
          -0.5021437 , -0.80189852]],
 
        [[ 0.97308538, -0.22467311, -0.26948036, ..., -0.59244694,
          -0.5021437 , -0.80189852],
         [ 0.79283522, -0.32569876, -0.26948036, ..., -0.59244694,
          -0.5021437 ,  1.24704058],
         [ 1.63400262, -0.32569876, -0.26948036, ...,  1.68791487,
          -0.5021437 , -0.80189852],
         [ 1.03316877, -0.32569876, -0.26948036, ...,  1.68791487,
          -0.5021437 , -0.80189852]],
 
        [[ 0.79283522, -0.32569876, -0.26948036, ..., -0.59244694,
          -0.5021437 ,  1.24704058],
         [ 1.63400262, -0.32569876, -0.26948036, ...,  1.6

In [15]:
train_sequences[0][0].shape

(64, 4, 80)

In [16]:
batch_x, batch_y = train_sequences[0]

In [17]:
batch_x.shape

(64, 4, 80)

In [18]:
batch_y.shape

(64,)

In [19]:
batch_x[0]

array([[ 0.36023485, -0.32569876, -0.26948036, -0.21607813, -0.44925484,
        -0.25192244,  0.33674899,  0.11657259, -0.38336799, -0.20342918,
        -0.51879127, -0.17684817, -0.18644735, -0.18700964, -0.18794355,
        -0.17860924, -0.17487318, -0.17207467, -0.17684817, -0.17606051,
        -0.18093394, -0.17724085, -0.17625772, -0.18112649, -0.17546773,
        -0.17880398, -0.17625772, -0.17684817, -0.17025417, -0.17880398,
        -0.17958107, -0.18663494,  5.50930658, -0.17977488, -0.17507156,
        -0.18757046, -0.18304223, -0.17996851, -0.18112649, -0.18246935,
        -0.1818949 , -0.18074121, -0.17782843,  5.4976804 , -0.18437293,
        -0.18151105, -0.18380366, -0.17938707, -0.17841432, -0.1818949 ,
        -0.18285144, -0.18285144, -0.17327891, -0.18112649, -0.17821921,
        -0.17821921, -0.18361356, -0.17821921, -0.17899853, -0.17919289,
        -0.17880398, -0.18112649, -0.17841432, -0.18074121, -0.17782843,
        -0.17802392, -0.18456235, -0.18035522, -0.1

In [20]:
features = [col for col in df_with_dummies.columns if col not in ['Name', 'PlayerID', 'Week', 'FantasyPoints']]
pd.DataFrame(X_train_sc, columns=X_train[features].columns).head(10)

Unnamed: 0,Rank,PassingYards,PassingTouchdowns,PassingInterceptions,RushingYards,RushingTouchdowns,Receptions,ReceivingYards,ReceivingTouchdowns,FumblesLost,...,Opponent_SEA,Opponent_SF,Opponent_TB,Opponent_TEN,Opponent_WAS,Position_FB,Position_QB,Position_RB,Position_TE,Position_WR
0,0.360235,-0.325699,-0.26948,-0.216078,-0.449255,-0.251922,0.336749,0.116573,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,-0.592447,-0.502144,1.247041
1,0.973085,-0.224673,-0.26948,-0.216078,-0.449255,-0.251922,-0.946823,-0.801239,-0.383368,-0.203429,...,-0.17066,5.67351,-0.179581,-0.180355,-0.176455,-0.177241,2.747269,-0.592447,-0.502144,-0.801899
2,0.792835,-0.325699,-0.26948,-0.216078,-0.449255,-0.251922,0.336749,-0.199914,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,-0.592447,-0.502144,1.247041
3,1.634003,-0.325699,-0.26948,-0.216078,-0.449255,-0.251922,-0.518966,-0.737941,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,1.687915,-0.502144,-0.801899
4,1.033169,-0.325699,-0.26948,-0.216078,-0.408424,-0.251922,-0.946823,-0.801239,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,1.687915,-0.502144,-0.801899
5,1.417702,-0.325699,-0.26948,-0.216078,-0.367593,-0.251922,-0.946823,-0.801239,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,1.687915,-0.502144,-0.801899
6,-0.625133,-0.325699,-0.26948,-0.216078,-0.449255,-0.251922,0.336749,-0.010022,2.060603,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,-0.592447,1.991462,-0.801899
7,1.033169,-0.325699,-0.26948,-0.216078,-0.449255,-0.251922,-0.946823,-0.801239,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,-0.363998,1.687915,-0.502144,-0.801899
8,-1.670584,2.162058,4.68214,-0.216078,0.081547,-0.251922,-0.946823,-0.801239,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,-0.180355,-0.176455,-0.177241,2.747269,-0.592447,-0.502144,-0.801899
9,-0.156482,-0.325699,-0.26948,-0.216078,-0.449255,-0.251922,0.336749,0.907789,-0.383368,-0.203429,...,-0.17066,-0.176258,-0.179581,5.544613,-0.176455,-0.177241,-0.363998,-0.592447,1.991462,-0.801899


In [21]:
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length=4, batch_size=64)

In [22]:
test_sequences[0][0].shape # first X

(64, 4, 80)

In [23]:
test_sequences[0][1].shape # first y

(64,)

In [24]:
model = Sequential()
model.add(GRU(80, input_shape=(4, 80), return_sequences=True))
model.add(GRU(80))
model.add(Dense(40, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))

Instructions for updating:
Colocations handled automatically by placer.


In [25]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 4, 80)             38640     
_________________________________________________________________
gru_2 (GRU)                  (None, 80)                38640     
_________________________________________________________________
dense_1 (Dense)              (None, 40)                3240      
_________________________________________________________________
dense_2 (Dense)              (None, 20)                820       
_________________________________________________________________
dense_3 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 11        
Total params: 81,561
Trainable params: 81,561
Non-trainable params: 0
_________________________________________________________________


In [26]:
model.compile(loss='mse', optimizer=Adam(lr=.001), metrics=['mae'])
history = model.fit_generator(train_sequences, epochs = 5, validation_data = test_sequences, verbose = 1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
