In [1]:
import nflgame
import pandas as pd
import numpy as np

# Get Data

In [41]:
# NFL Games for 2016
games = nflgame.games(2016)

In [42]:
players = nflgame.combine_game_stats(games)

In [43]:
# Get the Top 50 players who have the most rushing yards from the 2016 season
top50 = []
for p in players.rushing().sort('rushing_yds').limit(50):
    top50.append(p)

In [142]:
# Build DataFrame - Index for Player name and week #
top50_names = [str(p.name) for p in top50]
wks = range(1,18)
idxs = [top50_names, wks]

idx = pd.MultiIndex.from_product(idxs, names=['name', 'week'])
columns = ['rushing_att', 'rushing_yds', 'rushing_tds', 'fumbles_tot']
df = pd.DataFrame(data=None, index=idx, columns=columns)

In [143]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,rushing_att,rushing_yds,rushing_tds,fumbles_tot
name,week,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
E.Elliott,1,,,,
E.Elliott,2,,,,
E.Elliott,3,,,,
E.Elliott,4,,,,
E.Elliott,5,,,,


In [144]:
# Update DF with stats for Top 50 players for all games
for g in games:
    wk = g.schedule['week']
    for p in g.players.rushing():
        
        # Check if top50 player
        if p.name in df.index:
            df.loc[(p.name, wk), 'rushing_att'] = p.rushing_att
            df.loc[(p.name, wk), 'rushing_yds'] = p.rushing_yds
            df.loc[(p.name, wk), 'rushing_tds'] = p.rushing_tds
            df.loc[(p.name, wk), 'fumbles_tot'] = p.fumbles_tot

In [145]:
# Calculate Fantasy Points
df['fantasy_points'] = df['rushing_yds'] * .1 + df['rushing_tds'] * 6 - df['fumbles_tot'] * 2

In [146]:
df.head(17)

Unnamed: 0_level_0,Unnamed: 1_level_0,rushing_att,rushing_yds,rushing_tds,fumbles_tot,fantasy_points
name,week,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
E.Elliott,1,20.0,51.0,1.0,0.0,11.1
E.Elliott,2,21.0,83.0,1.0,2.0,10.3
E.Elliott,3,30.0,140.0,0.0,0.0,14.0
E.Elliott,4,23.0,138.0,1.0,0.0,19.8
E.Elliott,5,15.0,134.0,2.0,0.0,25.4
E.Elliott,6,28.0,157.0,0.0,0.0,15.7
E.Elliott,7,,,,,
E.Elliott,8,22.0,96.0,0.0,0.0,9.6
E.Elliott,9,18.0,92.0,2.0,1.0,19.2
E.Elliott,10,21.0,114.0,2.0,0.0,23.4


# Format Data

In [147]:
# TODO
df = df.fillna(0)
df_pivot = df.unstack(level=-1)
df_pivot

Unnamed: 0_level_0,rushing_att,rushing_att,rushing_att,rushing_att,rushing_att,rushing_att,rushing_att,rushing_att,rushing_att,rushing_att,...,fantasy_points,fantasy_points,fantasy_points,fantasy_points,fantasy_points,fantasy_points,fantasy_points,fantasy_points,fantasy_points,fantasy_points
week,1,2,3,4,5,6,7,8,9,10,...,8,9,10,11,12,13,14,15,16,17
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
A.Blue,1,5,2,3,6,2,11,9,0,7,...,4.1,0.0,2.3,0.0,0.0,3.8,3.5,0.1,13.3,2.8
A.Rodgers,4,3,5,0,5,3,2,6,6,3,...,6.0,4.3,8.7,1.3,2.6,-0.4,-0.6,1.9,7.3,4.2
B.Powell,4,1,4,4,4,3,4,6,6,4,...,13.6,2.2,3.7,0.0,3.6,0.3,26.5,8.4,6.0,12.2
C.Anderson,20,20,14,19,11,10,16,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
C.Hyde,23,14,21,15,22,14,0,0,0,13,...,0.0,0.0,1.4,6.6,6.5,9.2,19.3,5.1,-0.2,0.0
C.Ivory,0,0,12,8,0,11,5,4,18,9,...,-1.4,8.7,3.1,1.9,10.4,0.0,0.0,4.4,8.5,0.0
C.Kaepernick,0,0,0,0,0,8,9,0,5,10,...,0.0,0.3,11.5,-0.8,11.3,0.0,2.3,2.1,7.5,-2.4
C.Michael,15,10,20,18,0,18,16,10,5,5,...,10.0,6.1,2.2,0.0,0.4,1.9,3.6,10.5,0.4,0.6
D.Booker,3,9,4,7,6,5,17,19,10,24,...,9.4,2.2,7.6,0.0,7.9,9.5,0.1,1.7,0.7,11.7
D.Freeman,11,17,14,13,23,12,15,11,17,12,...,9.5,7.7,4.9,0.0,18.0,17.6,0.6,29.9,5.3,15.6


In [148]:
# Build Dataframe sorted by Week
stat_categories = ['rushing_att', 'rushing_yds', 'rushing_tds', 'fumbles_tot', 'fantasy_points']
wks = range(1,18)
columns = ['{0}_{1}'.format(wk, c) for wk in wks for c in stat_categories]
df_ = pd.DataFrame(0, index=df_pivot.index, columns=columns)

In [149]:
# Sort Data from df_pivot by week
for wk in wks:
    for c in stat_categories:
        column_name = '{0}_{1}'.format(wk, c)
        df_[column_name] = df_pivot[c][wk]
df_

Unnamed: 0_level_0,1_rushing_att,1_rushing_yds,1_rushing_tds,1_fumbles_tot,1_fantasy_points,2_rushing_att,2_rushing_yds,2_rushing_tds,2_fumbles_tot,2_fantasy_points,...,16_rushing_att,16_rushing_yds,16_rushing_tds,16_fumbles_tot,16_fantasy_points,17_rushing_att,17_rushing_yds,17_rushing_tds,17_fumbles_tot,17_fantasy_points
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A.Blue,1,4,0,0,0.4,5,11,0,0,1.1,...,21,73,1,0,13.3,11,28,0,0,2.8
A.Rodgers,4,16,1,0,7.6,3,29,1,3,2.9,...,2,13,1,0,7.3,10,42,0,0,4.2
B.Powell,4,41,0,0,4.1,1,2,0,0,0.2,...,15,60,0,0,6.0,22,122,0,0,12.2
C.Anderson,20,92,1,0,15.2,20,74,1,0,13.4,...,0,0,0,0,0.0,0,0,0,0,0.0
C.Hyde,23,88,2,0,20.8,14,34,0,1,1.4,...,13,38,0,2,-0.2,0,0,0,0,0.0
C.Ivory,0,0,0,0,0.0,0,0,0,0,0.0,...,14,45,1,1,8.5,0,0,0,0,0.0
C.Kaepernick,0,0,0,0,0.0,0,0,0,0,0.0,...,6,15,1,0,7.5,5,16,0,2,-2.4
C.Michael,15,66,0,0,6.6,10,60,0,1,4.0,...,4,4,0,0,0.4,3,6,0,0,0.6
D.Booker,3,8,0,1,-1.2,9,46,0,0,4.6,...,5,27,0,1,0.7,14,57,1,0,11.7
D.Freeman,11,20,0,0,2.0,17,93,0,0,9.3,...,13,53,0,0,5.3,12,96,1,0,15.6


# Train Test Split

In [150]:
def create_dataset(dataset, games_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - games_back - 1):
        a = dataset[i:(i + games_back), :]
        dataX.append(a)
        
        # Last Game's fantasy points
        dataY.append(dataset[i + games_back, -1])
    return np.array(dataX), np.array(dataY)

In [169]:
# Build Dataset
X = np.array([])
Y = np.array([])
games_back = 4
for player in set(df.index.get_level_values(0)):
    data = df.loc[player].values
    X_, Y_ = create_dataset(data, games_back=games_back)
    if len(X) == 0 and len(Y) == 0:
        X = X_
        Y = Y_
    else:
        X = np.append(X, X_, axis=0)
        Y = np.append(Y, Y_, axis=0)
print(X)

[[[ 15.   64.    0.    0.    6.4]
  [  5.    9.    0.    0.    0.9]
  [  0.    0.    0.    0.    0. ]
  [  0.    0.    0.    0.    0. ]]

 [[  5.    9.    0.    0.    0.9]
  [  0.    0.    0.    0.    0. ]
  [  0.    0.    0.    0.    0. ]
  [  0.    0.    0.    0.    0. ]]

 [[  0.    0.    0.    0.    0. ]
  [  0.    0.    0.    0.    0. ]
  [  0.    0.    0.    0.    0. ]
  [ 19.   85.    2.    0.   20.5]]

 ..., 
 [[ 19.   79.    0.    0.    7.9]
  [ 16.   77.    0.    0.    7.7]
  [ 18.   45.    1.    0.   10.5]
  [ 12.   61.    0.    0.    6.1]]

 [[ 16.   77.    0.    0.    7.7]
  [ 18.   45.    1.    0.   10.5]
  [ 12.   61.    0.    0.    6.1]
  [ 20.   48.    0.    1.    2.8]]

 [[ 18.   45.    1.    0.   10.5]
  [ 12.   61.    0.    0.    6.1]
  [ 20.   48.    0.    1.    2.8]
  [ 19.   51.    0.    1.    3.1]]]


In [171]:
# Train / Test Split
split = .67
train_sizeX = int(len(X) * split)
train_sizeY = int(len(Y) * split)
test_sizeX = int(len(X) * (1 - split))
test_sizeY = int(len(Y) * (1 - split))

trainX = X[:train_sizeX]
trainY = Y[:train_sizeY]
testX = X[train_sizeX:]
testY = Y[train_sizeY:]

# Build Model

In [172]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

column_nums = len(df.columns)
model = Sequential()
model.add(LSTM(4, input_dim=column_nums))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

  import sys
  import sys


# Train Model

In [156]:
model.fit(trainX, trainY, nb_epoch=10, batch_size=1, verbose=2)



ValueError: Error when checking input: expected lstm_1_input to have 3 dimensions, but got array with shape (8040, 1)