# Dense Neural Network Framework to Predict NFL Down Set Outcome Distribution


<a name='1'></a>
## 1 - Load Packages

In [189]:
import numpy as np
import pandas as pd
from datetime import datetime
import os
from importlib import reload
import dnn_utils

In [442]:
# Reload utils if necessary
dnn_utils=reload(dnn_utils)
from dnn_utils import *

In [191]:
# Set random seed for reproducibility
np.random.seed(1)

## 2 - Data Preparation Part 1
Initial data cleaning, labelling, and feature selection.

In [192]:
# Load Dataset
# Source - https://www.kaggle.com/datasets/maxhorowitz/nflplaybyplay2009to2016?resource=download

dataset=pd.read_csv("NFL Play by Play 2009-2018 (v5).csv")

  dataset=pd.read_csv("NFL Play by Play 2009-2018 (v5).csv")


In [193]:
# Observe First Records
dataset.head(5)

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv
0,46,2009091000,PIT,TEN,PIT,home,TEN,TEN,30.0,2009-09-10,...,,,,0,,,0.0,0.0,0.0,0.0
1,68,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,,,,0,,,0.0,0.0,0.0,0.0
2,92,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,,,,0,,,0.0,0.0,0.0,0.0
3,113,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,,,,0,,,0.0,0.0,0.0,0.0
4,139,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,,,,0,,,0.0,0.0,0.0,0.0


In [194]:
# List Columns
list(dataset.columns)[0:20]

['play_id',
 'game_id',
 'home_team',
 'away_team',
 'posteam',
 'posteam_type',
 'defteam',
 'side_of_field',
 'yardline_100',
 'game_date',
 'quarter_seconds_remaining',
 'half_seconds_remaining',
 'game_seconds_remaining',
 'game_half',
 'quarter_end',
 'drive',
 'sp',
 'qtr',
 'down',
 'goal_to_go']

In [195]:
# Remove Duplicates and Erroneous/Irrelevant Records

dataset=dataset[dataset.duplicated()==False]
dataset=dataset[dataset['down'].isnull()==False]
dataset=dataset[dataset['yardline_100'].isnull()==False]
len(dataset)

378969

In [196]:
# Create yardline field that adjusts yardline_100 into Buckets of 10
yardline_buckets=10
dataset['yardline']=np.ceil(dataset['yardline_100']/yardline_buckets)*yardline_buckets
dataset['yardline'].value_counts().sort_index()

10.0     25660
20.0     27679
30.0     31166
40.0     36085
50.0     42252
60.0     50187
70.0     56344
80.0     70081
90.0     29672
100.0     9843
Name: yardline, dtype: int64

In [197]:
# Create yards to go field with a final bucket of 11+
dataset['ydstogo_archive']=dataset['ydstogo'].copy()
yards_to_go_max=11
dataset['ydstogo']=np.where(dataset['ydstogo']>yards_to_go_max,yards_to_go_max,dataset['ydstogo'])
dataset['ydstogo'].value_counts().sort_index()

1      20059
2      14482
3      15325
4      16560
5      19495
6      19006
7      18986
8      18148
9      15822
10    173227
11     47859
Name: ydstogo, dtype: int64

In [198]:
# Convert yards_gained to Int64
dataset['yards_gained_archive']=dataset['yards_gained'].copy()
dataset['yards_gained']=dataset['yards_gained'].astype('Int64')

In [199]:
# Because of the lack on monotonicity observed in some cases with play_id >
# in the loop, I will rebuild the play_id to index from 1:p in a game based on the dataset order
dataset['play_id_archive']=dataset['play_id'].copy()

In [200]:
# Run a loop through all games, drives, and plays that adds a set of downs (down set) each time there is a new 1st down
# Because finding the down set requires cumulative information from the drive, the following loop likely can't be vectorized
dataset['down']=dataset['down'].apply(int)
down_set_list=[]
down_set_id_list=[]
down_set_id=0
play_in_game_list=[]
for g, game_id in enumerate(dataset['game_id'].unique(), start=1):
    game_dataset=dataset[dataset['game_id']==game_id]
    play_in_game=0
    for d, drive in enumerate(game_dataset['drive'].unique(), start=1):
        drive_dataset=game_dataset[game_dataset['drive']==drive]
        for p, play_id in enumerate(drive_dataset['play_id'].unique(), start=1):
            play_in_game+=1
            if p==1: 
                down_set=1
                down_set_id+=1
            elif drive_dataset.iloc[p-1,:]['down']==1:
                down_set+=1
                down_set_id+=1
            down_set_list.append(down_set)
            down_set_id_list.append(down_set_id)
            play_in_game_list.append(play_in_game)
dataset['down_set']=down_set_list
dataset['down_set_id']=down_set_id_list
dataset['play_id']=play_in_game_list

In [201]:
# Observe Result
dataset.loc[dataset['game_id']==2009091000,['game_id','drive','down','down_set','posteam']].head()

Unnamed: 0,game_id,drive,down,down_set,posteam
1,2009091000,1,1,1,PIT
2,2009091000,1,2,1,PIT
3,2009091000,1,3,1,PIT
4,2009091000,1,4,1,PIT
5,2009091000,2,1,1,TEN


In [202]:
# Build requirements to find absorption states

# Create identifier for the last play in a down set
dataset['down_set_max_play']=dataset.groupby(['game_id','drive','down_set'])['play_id'].transform(max)

# Create identifier for the last play in a drive
dataset['drive_max_play']=dataset.groupby(['game_id','drive'])['play_id'].transform(max)

# Create identifier for the last down in a set
dataset['down_set_max_down']=dataset.groupby(['game_id','drive'])['down'].transform(max)

# Create identifier for the last play in a game
dataset['game_half_max_play']=dataset.groupby(['game_id','game_half'])['play_id'].transform(max)

# Find yardline of next down_set
# dataset=dataset.drop(['yardline_next_down_set'],axis=1)
first_down=dataset.loc[dataset['down']==1,['game_id','drive','down_set','yardline']].rename(columns={'yardline': 'yardline_next_down_set'})
first_down['down_set']=first_down['down_set']-1
dataset=dataset.merge(first_down,how='left',on=['game_id','drive','down_set'])

# Observe Result
dataset.loc[dataset['game_id']==2009091000,['game_id','drive','down','down_set','play_id','yardline','posteam','down_set_max_play','drive_max_play','down_set_max_down','game_half_max_play','yardline_next_down_set']].head(20)

Unnamed: 0,game_id,drive,down,down_set,play_id,yardline,posteam,down_set_max_play,drive_max_play,down_set_max_down,game_half_max_play,yardline_next_down_set
0,2009091000,1,1,1,1,60.0,PIT,4,4,4,78,
1,2009091000,1,2,1,2,60.0,PIT,4,4,4,78,
2,2009091000,1,3,1,3,60.0,PIT,4,4,4,78,
3,2009091000,1,4,1,4,60.0,PIT,4,4,4,78,
4,2009091000,2,1,1,5,100.0,TEN,8,8,4,78,
5,2009091000,2,2,1,6,100.0,TEN,8,8,4,78,
6,2009091000,2,3,1,7,100.0,TEN,8,8,4,78,
7,2009091000,2,4,1,8,100.0,TEN,8,8,4,78,
8,2009091000,3,1,1,9,50.0,PIT,10,14,4,78,30.0
9,2009091000,3,2,1,10,40.0,PIT,10,14,4,78,30.0


In [203]:
# For cases with repeat downs (i.e. penalty), let's select the record with max_play_id to ensure a down set has a max of 4 plays
print(len(dataset))
dataset=dataset.loc[dataset['play_id']==dataset.groupby(['down_set_id','down'])['play_id'].transform(max),]
print(len(dataset))

378969
368753


In [204]:
# Make copy of dataset to find absorption states, and later join back to the main dataset
dataset_copy=dataset[dataset['play_id']==dataset['down_set_max_play']]

In [205]:
# Find the absorption state for the down set, the final state that ends the team's position or starts a new set of downs

# 19 different options
# 10 options where the drive continues
# One for each bucketed yardline position
# 9 options where the drive ends
# touchdown, field goal, safety, missed field goal, fumble, interception, turnover on downs, punt, end of half or game.

dataset_copy['absorption_state']=np.where(dataset_copy['play_id']!=dataset_copy['drive_max_play'],dataset_copy['yardline_next_down_set'].apply(str),
                                              np.where(dataset_copy['touchdown']==1,'touchdown',np.where(dataset_copy['field_goal_result']=='made','field_goal',np.where(dataset_copy['safety']==1,'safety',np.where(dataset_copy['field_goal_result'].isin(['missed','blocked']),'missed_field_goal',np.where(dataset_copy['fumble']==1,'fumble',np.where(dataset_copy['interception']==1,'interception',np.where(dataset_copy['kick_distance'].isnull()==False,'punt',np.where(dataset_copy['play_id']==dataset_copy['game_half_max_play'],'end_of_half','turnover')))))))))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset_copy['absorption_state']=np.where(dataset_copy['play_id']!=dataset_copy['drive_max_play'],dataset_copy['yardline_next_down_set'].apply(str),


In [206]:
# Observe down set absorption states

print(dataset_copy['absorption_state'].isnull().value_counts())
print(len(dataset_copy['absorption_state'].unique()))
dataset_copy['absorption_state'].value_counts()

False    152296
Name: absorption_state, dtype: int64
19


punt                 21076
70.0                 14986
60.0                 14721
50.0                 14030
touchdown            12629
40.0                 12624
30.0                 10950
20.0                  9551
10.0                  8586
field_goal            8183
80.0                  5528
interception          4043
end_of_half           3868
turnover              3812
fumble                3399
90.0                  2130
missed_field_goal     1567
100.0                  452
safety                 161
Name: absorption_state, dtype: int64

In [207]:
# Observe results for a game

# dataset_copy.loc[(last_play_in_down_set['absorption_state']=='nan'),['game_id','drive','down','down_set','play_id','down_set_max_play','drive_max_play','game_half_max_play','yardline','posteam','yardline_next_down_set','touchdown','field_goal_result','safety','fumble','interception','kick_distance','half_seconds_remaining','game_seconds_remaining','ydstogo_archive','yards_gained','penalty','absorption_state']].head(20)
dataset_copy.loc[dataset_copy['game_id']==2009091000,['game_id','drive','down','down_set','play_id','down_set_max_play','drive_max_play','game_half_max_play','yardline','posteam','yardline_next_down_set','touchdown','field_goal_result','safety','fumble','interception','kick_distance','half_seconds_remaining','game_seconds_remaining','ydstogo_archive','yards_gained','penalty','absorption_state']].head(10)

Unnamed: 0,game_id,drive,down,down_set,play_id,down_set_max_play,drive_max_play,game_half_max_play,yardline,posteam,...,safety,fumble,interception,kick_distance,half_seconds_remaining,game_seconds_remaining,ydstogo_archive,yards_gained,penalty,absorption_state
3,2009091000,1,4,1,4,4,4,78,60.0,PIT,...,0.0,0.0,0.0,54.0,1707.0,3507.0,8,0,0.0,punt
7,2009091000,2,4,1,8,8,8,78,100.0,TEN,...,0.0,0.0,0.0,50.0,1594.0,3394.0,8,0,0.0,punt
9,2009091000,3,2,1,10,10,14,78,40.0,PIT,...,0.0,0.0,0.0,,1548.0,3348.0,7,10,0.0,30.0
13,2009091000,3,4,2,14,14,14,78,50.0,PIT,...,0.0,0.0,0.0,30.0,1405.0,3205.0,21,0,0.0,punt
14,2009091000,4,1,1,15,15,20,78,90.0,TEN,...,0.0,0.0,0.0,,1400.0,3200.0,10,32,1.0,50.0
15,2009091000,4,1,2,16,16,20,78,50.0,TEN,...,0.0,0.0,0.0,,1377.0,3177.0,10,20,0.0,30.0
19,2009091000,4,4,3,20,20,20,78,20.0,TEN,...,0.0,0.0,0.0,37.0,1308.0,3108.0,7,0,0.0,missed_field_goal
23,2009091000,5,4,1,24,24,24,78,80.0,PIT,...,0.0,0.0,0.0,53.0,1202.0,3002.0,16,0,0.0,punt
24,2009091000,6,1,1,25,25,31,78,80.0,TEN,...,0.0,0.0,0.0,,1189.0,2989.0,10,10,0.0,70.0
25,2009091000,6,1,2,26,26,31,78,70.0,TEN,...,0.0,0.0,0.0,,1167.0,2967.0,10,0,1.0,70.0


In [208]:
# Join absorption_state to the main dataset

dataset_len_before=len(dataset)
dataset=dataset.merge(dataset_copy[dataset_copy['play_id']==dataset_copy['down_set_max_play']].loc[:,['game_id','drive','down_set','absorption_state']],how='left',on=['game_id','drive','down_set'])
assert len(dataset) == dataset_len_before
dataset.head()

Unnamed: 0,play_id,game_id,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,...,yards_gained_archive,play_id_archive,down_set,down_set_id,down_set_max_play,drive_max_play,down_set_max_down,game_half_max_play,yardline_next_down_set,absorption_state
0,1,2009091000,PIT,TEN,PIT,home,TEN,PIT,58.0,2009-09-10,...,5.0,68,1,1,4,4,4,78,,punt
1,2,2009091000,PIT,TEN,PIT,home,TEN,PIT,53.0,2009-09-10,...,-3.0,92,1,1,4,4,4,78,,punt
2,3,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,113,1,1,4,4,4,78,,punt
3,4,2009091000,PIT,TEN,PIT,home,TEN,PIT,56.0,2009-09-10,...,0.0,139,1,1,4,4,4,78,,punt
4,5,2009091000,PIT,TEN,TEN,away,PIT,TEN,98.0,2009-09-10,...,0.0,162,1,2,8,8,4,78,,punt


In [209]:
# Select only the required columns

dataset=dataset.loc[:,['game_id','game_date','drive','down_set','down_set_id','play_id','down','down_set_max_down','yardline','ydstogo','absorption_state','half_seconds_remaining']]

dataset.head()

Unnamed: 0,game_id,game_date,drive,down_set,down_set_id,play_id,down,down_set_max_down,yardline,ydstogo,absorption_state,half_seconds_remaining
0,2009091000,2009-09-10,1,1,1,1,1,4,60.0,10,punt,1793.0
1,2009091000,2009-09-10,1,1,1,2,2,4,60.0,5,punt,1756.0
2,2009091000,2009-09-10,1,1,1,3,3,4,60.0,8,punt,1715.0
3,2009091000,2009-09-10,1,1,1,4,4,4,60.0,8,punt,1707.0
4,2009091000,2009-09-10,2,1,2,5,1,4,100.0,10,punt,1696.0


In [210]:
# Save copy of dataset after part 1 data preparation
dataset_dataprep_part1=dataset.copy()

In [211]:
# dataset=dataset_dataprep_part1.copy()

# 3 - Data Preparation Part 2
Preparing data for the neural network

In [212]:
# Scale half_seconds remaining over total seconds in a half of 1800
dataset['half_seconds_remaining']=dataset['half_seconds_remaining']/1800
print(dataset['half_seconds_remaining'].max())
print(dataset['half_seconds_remaining'].min())
dataset['half_seconds_remaining'].head(10)

1.0
0.0


0    0.996111
1    0.975556
2    0.952778
3    0.948333
4    0.942222
5    0.922222
6    0.906111
7    0.885556
8    0.880000
9    0.860000
Name: half_seconds_remaining, dtype: float64

In [213]:
# Build a matrix with the intersection of yardline and ydstogo buckets
# In the future, look to vectorize the loop

innerstates = np.zeros([10,11,len(dataset)])

for i in range(innerstates.shape[2]):
    innerstates[(dataset['yardline']/10).astype(int)[i]-1,dataset['ydstogo'][i]-1,i]=1

In [214]:
# Observe results
innerstates[:,:,0]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [215]:
# Unroll the matrix horizontally
innerstates_reshaped=innerstates.reshape(innerstates.shape[0]*innerstates.shape[1],innerstates.shape[2])
innerstates_reshaped.shape

(110, 368753)

In [216]:
# Verify horizontal unrolling & observe results
i=0
yardline=(dataset['yardline']/10).astype(int)[i]
ydstogo=dataset['ydstogo'][i]
# print(yardline)
# print(ydstogo)
print((yardline-1)*11+ydstogo-1)
assert ((yardline-1)*11+ydstogo-1) == list(innerstates_reshaped[:,i]).index(1)
innerstates_reshaped[:,0]

64


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0.])

In [217]:
# Build absorption states matrix

print(dataset['absorption_state'].unique())

absorption_states=['10.0', '20.0', '30.0', '40.0', '50.0', '60.0', '70.0', '80.0', '90.0', '100.0',
 'punt', 'missed_field_goal', 'interception', 'touchdown', 'fumble', 'field_goal', 'end_of_half', 'turnover', 'safety']

print(absorption_states)

absorption_states_reshaped = np.zeros([len(absorption_states),len(dataset)])

for i in range(absorption_states_reshaped.shape[1]):
    absorption_states_reshaped[absorption_states.index(dataset['absorption_state'][i]),i]=1

absorption_states_reshaped

['punt' '30.0' '50.0' 'missed_field_goal' '70.0' 'interception' '40.0'
 '80.0' '20.0' 'touchdown' '60.0' 'fumble' 'field_goal' 'end_of_half'
 '10.0' 'turnover' '90.0' '100.0' 'safety']
['10.0', '20.0', '30.0', '40.0', '50.0', '60.0', '70.0', '80.0', '90.0', '100.0', 'punt', 'missed_field_goal', 'interception', 'touchdown', 'fumble', 'field_goal', 'end_of_half', 'turnover', 'safety']


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [218]:
# Observe results
print(absorption_states.index(dataset['absorption_state'][i]))
list(absorption_states_reshaped[:,i])

16


[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 0.0,
 0.0]

In [219]:
# Partition data into training, hypertuning, and testing
# I will use 2009-2016, 2017, and 2018, for each respectively
dataset['partition']=np.where(pd.DatetimeIndex(dataset['game_date']).year==2018,'testing',np.where(pd.DatetimeIndex(dataset['game_date']).year==2017,'hypertuning','training'))
# Games by partition
print(dataset.groupby(['partition']).game_id.nunique().sort_values(ascending=False))
print(dataset.groupby(['partition']).down_set_id.nunique().sort_values(ascending=False))

partition
training       2030
hypertuning     272
testing         224
Name: game_id, dtype: int64
partition
training       122476
hypertuning     16158
testing         13662
Name: down_set_id, dtype: int64


In [220]:
# Combine data
# identifiers (3) + innerstates (110) + features (1) + absorptionstates (19) = 333
dataset_reshaped=pd.concat([dataset.loc[:,['down_set_id','down','down_set_max_down','partition']],pd.DataFrame(innerstates_reshaped.T),dataset.loc[:,['half_seconds_remaining']],pd.DataFrame(absorption_states_reshaped.T)],axis=1)
print(dataset_reshaped.shape)
dataset_reshaped.head()

(368753, 134)


Unnamed: 0,down_set_id,down,down_set_max_down,partition,0,1,2,3,4,5,...,9,10,11,12,13,14,15,16,17,18
0,1,1,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,3,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,4,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2,1,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [221]:
dataset[44:47]

Unnamed: 0,game_id,game_date,drive,down_set,down_set_id,play_id,down,down_set_max_down,yardline,ydstogo,absorption_state,half_seconds_remaining,partition
44,2009091000,2009-09-10,9,2,17,47,2,4,90.0,11,70.0,0.323889,training
45,2009091000,2009-09-10,9,3,18,48,1,4,70.0,10,punt,0.299444,training
46,2009091000,2009-09-10,9,3,18,49,2,4,70.0,9,punt,0.276667,training


In [222]:
dataset_reshaped[44:47]

Unnamed: 0,down_set_id,down,down_set_max_down,partition,0,1,2,3,4,5,...,9,10,11,12,13,14,15,16,17,18
44,17,2,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
45,18,1,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
46,18,2,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [225]:
# Create a separate copy of data that has the next state as y
dataset_reshaped_copy=dataset_reshaped.copy()

dataset_reshaped_next_state=pd.concat([dataset.loc[:,['down_set_id','down']],pd.DataFrame(innerstates_reshaped.T),dataset.loc[:,['half_seconds_remaining']],pd.DataFrame(absorption_states_reshaped.T)],axis=1)
dataset_reshaped_next_state.eval("down = down - 1", inplace = True)

dataset_len_before=len(dataset_reshaped_copy)

dataset_reshaped_copy=dataset_reshaped_copy.merge(dataset_reshaped_next_state,on=['down_set_id','down'],how='left')

assert len(dataset_reshaped_copy) == dataset_len_before

# Fill NAs with zeros
dataset_reshaped_copy=dataset_reshaped_copy.fillna(0)

# Adjust absorption state to 0s if not the max down set
collen=dataset_reshaped_copy.shape[1]
dataset_reshaped_copy.loc[dataset_reshaped_copy['down']!=dataset_reshaped_copy['down_set_max_down'],dataset_reshaped_copy.columns.values[(collen-130-19):(collen-130)]]=0

# Copy the absorption state 
dataset_reshaped_copy.iloc[:,(collen-19):dataset_reshaped_copy.shape[1]]=dataset_reshaped_copy.iloc[:,(collen-130-19):(collen-130)]

# Drop X cols
dataset_reshaped_copy=dataset_reshaped_copy.drop(dataset_reshaped_copy.columns.values[(collen-260):(collen-130)],axis=1)

dataset_reshaped_copy.head()

Unnamed: 0,down_set_id,down,down_set_max_down,partition,0_y,1_y,2_y,3_y,4_y,5_y,...,9_y,10_y,11_y,12_y,13_y,14_y,15_y,16_y,17_y,18_y
0,1,1,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,3,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,4,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2,1,4,training,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [226]:
# Drop down_set_max_down column
dataset_reshaped=dataset_reshaped.drop(['down_set_max_down'],axis=1)
dataset_reshaped_copy=dataset_reshaped_copy.drop(['down_set_max_down'],axis=1)

In [227]:
# Separate each partition
training_withid=dataset_reshaped[dataset_reshaped['partition']=='training'].drop(['partition'],axis=1)
hypertuning_withid=dataset_reshaped[dataset_reshaped['partition']=='hypertuning'].drop(['partition'],axis=1)
testing_withid=dataset_reshaped[dataset_reshaped['partition']=='testing'].drop(['partition'],axis=1)

training=training_withid.drop(['down_set_id'],axis=1)
hypertuning=hypertuning_withid.drop(['down_set_id'],axis=1)
testing=testing_withid.drop(['down_set_id'],axis=1)

training_x=np.array(training.iloc[:,0:(len(training.columns))]).T
hypertuning_x=np.array(hypertuning.iloc[:,0:(len(hypertuning.columns))]).T
testing_x=np.array(testing.iloc[:,0:(len(testing.columns))]).T

training_y=np.array(training.iloc[:,(len(training.columns)-19):len(training.columns)]).T
hypertuning_y=np.array(hypertuning.iloc[:,(len(hypertuning.columns)-19):len(hypertuning.columns)]).T
testing_y=np.array(testing.iloc[:,(len(testing.columns)-19):len(testing.columns)]).T

training_y_next=np.array(dataset_reshaped_copy[dataset_reshaped_copy['partition']=='training'].drop(['down','down_set_id','partition'],axis=1)).T
hypertuning_y_next=np.array(dataset_reshaped_copy[dataset_reshaped_copy['partition']=='hypertuning'].drop(['down','down_set_id','partition'],axis=1)).T
testing_y_next=np.array(dataset_reshaped_copy[dataset_reshaped_copy['partition']=='testing'].drop(['down','down_set_id','partition'],axis=1)).T

In [228]:
# Zero out absorption states in X
# They will be left in as zeroes to simplify teacher-forcing where the shape of X will need to match the layer's node shape

print(training_x[(len(training_x)-19):len(training_x),].sum())

training_x[(len(training_x)-19):len(training_x),]=0
hypertuning_x[(len(hypertuning_x)-19):len(hypertuning_x),]=0
testing_x[(len(testing_x)-19):len(testing_x),]=0

print(training_x[(len(training_x)-19):len(training_x),].sum())

296988.0
0.0


In [229]:
# Validate y_next
i=3
print(training_y[:,i])
print(training_y_next[:,i])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


In [230]:
# Observe training records by down set
print(dataset.groupby(['down']).down_set_id.nunique().sort_values(ascending=False))

down
1    152294
2    108732
3     69713
4     38014
Name: down_set_id, dtype: int64


In [231]:
# Confirm shapes
# shape = (n_x/n_y, m training examples)
print(training_x.shape)
print(training_y.shape)
print(training_y_next.shape)

(131, 296988)
(19, 296988)
(130, 296988)


In [232]:
# Can get data for a down as follows
down=4
print(training_x[:,training_x[0,:]==down][1:,:].shape)
training_x[:,training_x[0,:]==down][1:,:]

(130, 30668)


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

# 4 - Model Training

In [233]:
# Define function to save params locally

def save_params(parameters,path = 'modelparams'):
    time=str(datetime.now())[0:10]+'--'+str(datetime.now())[11:13]+'-'+str(datetime.now())[14:16]
    filename = f"""params-{time}.npy"""
    np.save(os.path.join(path,filename),  parameters)

In [234]:
def load_params(filename, path = "modelparams"):
    parameters=np.load(os.path.join(path,filename),allow_pickle=True)
    return np.array(parameters, ndmin=1)[0]

In [235]:
def load_last_params(path = "modelparams"):
    for i, filename in enumerate(reversed(os.listdir(path))):
        if i==0: parameters=np.load(os.path.join(path,filename),allow_pickle=True)
        return np.array(parameters, ndmin=1)[0]

In [436]:
# Define training function

def L_layer_model(X, Y, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 3000, down = 4, print_cost = False, inner_layer_activation = 'sigmoid', last_layer_activation = 'softmax', single_layer_training=False):
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->last_layer_activation.
    
    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- true "label" vector of absorption states (contains 1 for the actual absorption state and 0 otherwise), of shape (19, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    # Get Layer Count
    
    if not single_layer_training:
        L = len(parameters) // 2                  # number of layers in the neural network
    else:
        L = down
    
    # Subset full down data to only the specified down
    
    matching_down = X[0,:]==down
        
    X = X[:,matching_down][1:,:]
    Y = Y[:,matching_down]

    np.random.seed(1)
    costs = []                         # keep track of cost
    
    # Parameters initialization.
    if parameters is None:
        parameters = initialize_parameters_deep(layers_dims)
        
    # Define mini batch function
        
    def random_mini_batches(X,Y,minibatch_size):

        m = Y.shape[1]            # number of examples

        # Lets shuffle X and Y
        permutation = list(np.random.permutation(m))            # shuffled index of examples
        shuffled_X = X[:, permutation]
        shuffled_Y = Y[:, permutation]

        minibatches = []                                        # we will append all minibatch_Xs and minibatch_Ys to this minibatch list 
        number_of_minibatches = int(m/minibatch_size)           # number of mini batches 

        for k in range(number_of_minibatches):
            minibatch_X = shuffled_X[:,k*minibatch_size: (k+1)*minibatch_size ]
            minibatch_Y = shuffled_Y[:,k*minibatch_size: (k+1)*minibatch_size ]
            minibatch_pair = (minibatch_X , minibatch_Y)                        #tuple of minibatch_X and miinibatch_Y
            minibatches.append(minibatch_pair)
        if m%minibatch_size != 0 :
            last_minibatch_X = shuffled_X[:,(k+1)*minibatch_size: m ]
            last_minibatch_Y = shuffled_Y[:,(k+1)*minibatch_size: m ]
            last_minibatch_pair = (last_minibatch_X , last_minibatch_Y)
            minibatches.append(last_minibatch_pair)
        return minibatches
        
    # Loop (gradient descent)
    for i in range(0, num_iterations):
        
        batches = random_mini_batches(X, Y, batch_size)
        
        for i, batch in enumerate(batches):
                    
            X_b, Y_b = batch

            # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> last_layer_activation.
            AL, caches = L_model_forward(X_b, parameters, inner_layer_activation, last_layer_activation, down, single_layer_training)

            # Return results for debugging
    #         return AL, Y, caches

            # Compute cost.
            cost = compute_cost(AL, Y_b, last_layer_activation)
            
            # Backward propagation.
            grads = L_model_backward(AL, Y_b, caches, inner_layer_activation, last_layer_activation, L, down, single_layer_training)

            # Update parameters.
            parameters = update_parameters(parameters, grads, learning_rate, down, single_layer_training)

            # Print the cost every 100 iterations
            if print_cost and (i % 100 == 0 or i == num_iterations - 1):
                print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
            if i % 100 == 0 or i == num_iterations:
                costs.append(cost)
    
    return parameters, costs

In [237]:
layers_dims=(130,130,130,130,19)

In [352]:
parameters=initialize_parameters_deep(layers_dims)

In [415]:
parameters=load_last_params()

In [334]:
#Test Single Run
down=4
parameters_test, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 100, down = down, print_cost = False, last_layer_activation = 'softmax')

print("Cost after first iteration: " + str(costs[0]))

Cost after first iteration: -1.2347131134383327


In [435]:
# Train on down 4 data with 1 layer
for global_its in range(10):
    for d in reversed(range(3,4)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

initiating set of iterations for down 4
Cost after first iteration: -0.8396183214686561
initiating set of iterations for down 4
Cost after first iteration: -0.8375459838756985
initiating set of iterations for down 4
Cost after first iteration: -0.8359525155832057
initiating set of iterations for down 4
Cost after first iteration: -0.8347059074395052
initiating set of iterations for down 4
Cost after first iteration: -0.8337149386700062
initiating set of iterations for down 4
Cost after first iteration: -0.8329111040498579
initiating set of iterations for down 4
Cost after first iteration: -0.8322439633795056
initiating set of iterations for down 4
Cost after first iteration: -0.8316771188559837
initiating set of iterations for down 4
Cost after first iteration: -0.8311843323269258
initiating set of iterations for down 4
Cost after first iteration: -0.8307465209015028


In [359]:
# Train on down 3 data with 1 layer

for global_its in range(10):
    for d in reversed(range(2,3)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y_next, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 1000, down = down, last_layer_activation = 'sigmoid', single_layer_training = True)
        print("Cost after iteration: " + str(costs[0]))
    save_params(parameters)

initiating set of iterations for down 3
Cost after iteration: 0.17349669589725
initiating set of iterations for down 3
Cost after iteration: 0.03630075752012643
initiating set of iterations for down 3
Cost after iteration: 0.03335476814730211
initiating set of iterations for down 3
Cost after iteration: 0.03173226866834932
initiating set of iterations for down 3
Cost after iteration: 0.03080745526603485
initiating set of iterations for down 3
Cost after iteration: 0.030225028395372486
initiating set of iterations for down 3
Cost after iteration: 0.029818396957196437
initiating set of iterations for down 3
Cost after iteration: 0.029509052671994
initiating set of iterations for down 3
Cost after iteration: 0.029258630579135563
initiating set of iterations for down 3
Cost after iteration: 0.02904762378620012


In [362]:
# Initialize parameters W2 to equal W3
parameters['W2']=parameters['W3'].copy()
parameters['b2']=parameters['b3'].copy()

In [364]:
# Train on Down 2 data with 1 layer

for global_its in range(5):
    for d in reversed(range(1,2)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y_next, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 1000, down = down, last_layer_activation = 'sigmoid', single_layer_training = True)
        print("Cost after iteration: " + str(costs[0]))
    save_params(parameters)

initiating set of iterations for down 2
Cost after iteration: 0.03083283727026461
initiating set of iterations for down 2
Cost after iteration: 0.029051450547077463
initiating set of iterations for down 2
Cost after iteration: 0.02848314791531529
initiating set of iterations for down 2
Cost after iteration: 0.02809749898594459
initiating set of iterations for down 2
Cost after iteration: 0.027807161562256395


In [382]:
# Initialize parameters W1 to equal W2
parameters['W1']=parameters['W2'].copy()
parameters['b1']=parameters['b2'].copy()

In [384]:
# Train on down 1 data with 1 layer

for global_its in range(3):
    for d in reversed(range(0,1)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y_next, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 1000, down = down, last_layer_activation = 'sigmoid', single_layer_training = True)
        print("Cost after iteration: " + str(costs[0]))
    save_params(parameters)

In [433]:
# Initialize the last 19 (absorption state) to and from W parameters to equal the identity matrix, as there are no state transitions
# Also, intialize W so that absorption states don't return to non-absorption states

# parameters['w1'].shape >> (130 outputs - down 2 predictions, 130 inputs - down 1 data)

dims=parameters['W1'].shape[0]

for l in range(len(layers_dims) - 1):
    # Last (19,19) matrix is the absorption states which can be the identity matrix
    if l+1 == 4:
        parameters[f"""W{l+1}"""][:,dims-19:dims]=np.identity(19)
    else:
        parameters[f"""W{l+1}"""][dims-19:dims,dims-19:dims]=np.identity(19)
        # Absorption states can't return to other states so will zero such cases
        parameters[f"""W{l+1}"""][0:dims-19,dims-19:dims]=np.zeros([dims-19,19])

In [None]:
# Train on multiple downs to predict the down set absorption set using the initialized parameters from single-layer models
for global_its in range(25):
    for d in reversed(range(1,4)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 128, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

initiating set of iterations for down 4
Cost after first iteration: -0.8303497701601217
initiating set of iterations for down 3
Cost after first iteration: -2.5932549371467437
initiating set of iterations for down 2


# 5 - Model Evaluation

In [443]:
# Training Set

evaldata_x = training_x
evaldata_y = training_y

for d in reversed(range(4)):
    down = d + 1
    print(f"""Evaluation for down {down}""")
    pred_down = predict(evaldata_x[:,evaldata_x[0,:]==down][0:,:], evaldata_y[:,evaldata_x[0,:]==down][:,:], parameters, inner_layer_activation = 'sigmoid', last_layer_activation = 'softmax', down = down, return_probs = False)
    m=evaldata_x[:,evaldata_x[0,:]==down].shape[1]
    unique, counts = np.unique(np.tile(np.array(absorption_states,ndmin=2).T,(1, m))[pred_down==1], return_counts=True)
    print(np.asarray((unique, counts)).T)

Evaluation for down 4
Accuracy: 0.7355875831485588
Cost: -0.8723278571070655
[['30.0' '98']
 ['40.0' '192']
 ['field_goal' '9063']
 ['punt' '20413']
 ['turnover' '902']]
Evaluation for down 3
Accuracy: 0.35379253938337896
Cost: -2.6316317440943773
[['field_goal' '5158']
 ['punt' '39526']
 ['turnover' '11558']]
Evaluation for down 2
Accuracy: 0.1895461394456874
Cost: -3.5163830068796664
[['punt' '58667']
 ['turnover' '28937']]
Evaluation for down 1
Accuracy: 0.09166843574962849
Cost: -3.8413624282966583
[['punt' '50999']
 ['turnover' '71475']]


In [445]:
# Testing Set

evaldata_x = testing_x
evaldata_y = testing_y

for d in reversed(range(4)):
    down = d + 1
    print(f"""Evaluation for down {down}""")
    pred_down = predict(evaldata_x[:,evaldata_x[0,:]==down][0:,:], evaldata_y[:,evaldata_x[0,:]==down][:,:], parameters, inner_layer_activation = 'sigmoid', last_layer_activation = 'softmax', down = down, return_probs = False)
    m=evaldata_x[:,evaldata_x[0,:]==down].shape[1]
    unique, counts = np.unique(np.tile(np.array(absorption_states,ndmin=2).T,(1, m))[pred_down==1], return_counts=True)
    print(np.asarray((unique, counts)).T)

Evaluation for down 4
Accuracy: 0.7226917057902973
Cost: nan
[['30.0' '7']
 ['40.0' '15']
 ['field_goal' '978']
 ['punt' '2101']
 ['turnover' '93']]
Evaluation for down 3
Accuracy: 0.33922802244516237
Cost: nan
[['field_goal' '547']
 ['punt' '4050']
 ['turnover' '1282']]
Evaluation for down 2
Accuracy: 0.17732588768306817
Cost: nan
[['punt' '6229']
 ['turnover' '3256']]
Evaluation for down 1
Accuracy: 0.08249158249158249
Cost: nan
[['punt' '5645']
 ['turnover' '8008']]


# 5 - Model Predictions with New Data

In [473]:
def predict_new_data(down, yardline, ydstogo, half_seconds_remaining, yardline_buckets=10, yards_to_go_max=11, secondsinhalf=1800):
    
    # Create yardline field that adjusts yardline_100 into Buckets of 10
    yardline=np.ceil(yardline/yardline_buckets)*yardline_buckets

    # Create yards to go field with a final bucket of 11+
    ydstogo=np.where(ydstogo>yards_to_go_max,yards_to_go_max,ydstogo)
    
    # Scale half_seconds remaining over total seconds in half of 1800
    half_seconds_remaining=half_seconds_remaining/secondsinhalf
    
    # Build a matrix with the intersection of yardline and ydstogo
    innerstates = np.zeros([yardline_buckets,yards_to_go_max,1])
    
    for i in range(innerstates.shape[2]):
        innerstates[(yardline/10).astype(int)-1,ydstogo-1,i]=1
        
    # Unroll the matrix horizontally
    innerstates_reshaped=innerstates.reshape(innerstates.shape[0]*innerstates.shape[1],innerstates.shape[2])
    
    # Combine data
    # Can omit concatenating down in this case as it's passed as an argument
    data_reshaped=pd.concat([pd.DataFrame({down}),pd.DataFrame(innerstates_reshaped.T),pd.DataFrame({half_seconds_remaining}),pd.DataFrame(np.zeros([1,19]))],axis=1)
    
    data_reshaped=np.array(data_reshaped).T
                             
    preds = predict(data_reshaped, None, parameters, inner_layer_activation = 'sigmoid', last_layer_activation = 'softmax', down = down, return_probs = True)
    
    df = pd.concat([pd.DataFrame(absorption_states),pd.DataFrame(preds)],axis=1)
    df.columns = ['absorption_state','predicted_probability']
    
    return df

In [472]:
yardline_buckets=10
yards_to_go_max=11
secondsinhalf=1800

# Create yardline field that adjusts yardline_100 into Buckets of 10
yardline=np.ceil(yardline/yardline_buckets)*yardline_buckets

# Create yards to go field with a final bucket of 11+
ydstogo=np.where(ydstogo>yards_to_go_max,yards_to_go_max,ydstogo)

# Scale half_seconds remaining over total seconds in half of 1800
half_seconds_remaining=half_seconds_remaining/secondsinhalf

# Build a matrix with the intersection of yardline and ydstogo
innerstates = np.zeros([yardline_buckets,yards_to_go_max,1])

for i in range(innerstates.shape[2]):
    innerstates[(yardline/10).astype(int)-1,ydstogo-1,i]=1

# Unroll the matrix horizontally
innerstates_reshaped=innerstates.reshape(innerstates.shape[0]*innerstates.shape[1],innerstates.shape[2])

# Combine data
# Can omit concatenating down in this case as it's passed as an argument
data_reshaped=pd.concat([pd.DataFrame({down}),pd.DataFrame(innerstates_reshaped.T),pd.DataFrame({half_seconds_remaining}),pd.DataFrame(np.zeros([1,19]))],axis=1)

data_reshaped=np.array(data_reshaped).T

data_reshaped.shape

# preds = predict(data_reshaped, None, parameters, inner_layer_activation = 'sigmoid', last_layer_activation = 'softmax', down = down, return_probs = True)


(131, 1)

In [474]:
down = 4
yardline = 10
ydstogo = 5
half_seconds_remaining = 250

predict_new_data(down, yardline, ydstogo, half_seconds_remaining)

Unnamed: 0,absorption_state,predicted_probability
0,10.0,0.014921
1,20.0,0.002831
2,30.0,0.002671
3,40.0,0.002718
4,50.0,0.001981
5,60.0,0.001504
6,70.0,0.000497
7,80.0,0.000744
8,90.0,0.000469
9,100.0,1.2e-05
