# Dense Neural Network Framework to Predict NFL Down Set Outcome Distribution


<a name='1'></a>
## 1 - Load Packages

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import os
from importlib import reload
import dnn_utils

In [2]:
# Reload utils if necessary
dnn_utils=reload(dnn_utils)
from dnn_utils import *

In [3]:
# Set random seed for reproducibility
np.random.seed(1)

## 2 - Data Preparation Part 1
Initial data cleaning, labelling, and feature selection.

In [4]:
# Load Dataset
# Source - https://www.kaggle.com/datasets/maxhorowitz/nflplaybyplay2009to2016?resource=download

dataset=pd.read_csv("NFL Play by Play 2009-2018 (v5).csv")

FileNotFoundError: [Errno 2] No such file or directory: 'NFL Play by Play 2009-2018 (v5).csv'

In [None]:
# Observe First Records
dataset.head(5)

In [None]:
# List Columns
list(dataset.columns)

In [None]:
# Remove Duplicates and Erroneous/Irrelevant Records

dataset=dataset[dataset.duplicated()==False]
dataset=dataset[dataset['down'].isnull()==False]
dataset=dataset[dataset['yardline_100'].isnull()==False]
len(dataset)

In [None]:
# Create yardline field that adjusts yardline_100 into Buckets of 10
yardline_buckets=10
dataset['yardline']=np.ceil(dataset['yardline_100']/yardline_buckets)*yardline_buckets
dataset['yardline'].value_counts().sort_index()

In [None]:
# Create yards to go field with a final bucket of 11+
dataset['ydstogo_archive']=dataset['ydstogo']
yards_to_go_max=11
dataset['ydstogo']=np.where(dataset['ydstogo']>yards_to_go_max,yards_to_go_max,dataset['ydstogo'])
dataset['ydstogo'].value_counts().sort_index()

In [None]:
# Convert yards_gained to Int64
dataset['yards_gained_archive']=dataset['yards_gained']
dataset['yards_gained']=dataset['yards_gained'].astype('Int64')

In [None]:
# Because of the lack on monotonicity observed in some cases with play_id >
# in the loop, I will rebuild the play_id to enumerate from 1:p in a game based on the dataset order
dataset['play_id_archive']=dataset['play_id']

In [None]:
# 1. Create a drive_play feature that enumerates plays from 1:n in each drive.
# 2. A down set can only last 4 downs so we only need to look within +/- 3 entries to find other plays in the down set. 
# 2. I can run a loop through all games, drives, and plays that adds a down each time there is a new down 1

In [None]:
# Find down set through a loop
# Likely can't be vectorized as finding the drive set requires cumulative information from the drive
dataset['down']=dataset['down'].apply(int)
down_set_list=[]
down_set_id_list=[]
down_set_id=0
play_in_game_list=[]
for g, game_id in enumerate(dataset['game_id'].unique(), start=1):
    game_dataset=dataset[dataset['game_id']==game_id]
    play_in_game=1
    for d, drive in enumerate(game_dataset['drive'].unique(), start=1):
        drive_dataset=game_dataset[game_dataset['drive']==drive]
        for p, play_id in enumerate(drive_dataset['play_id'].unique(), start=1):
            play_in_game+=1
            if p==1: 
                down_set=1
                down_set_id+=1
            elif drive_dataset.iloc[p-1,:]['down']==1:
                down_set+=1
            down_set_list.append(down_set)
            down_set_id_list.append(down_set_id)
            play_in_game_list.append(play_in_game)
dataset['down_set']=down_set_list
dataset['down_set_id']=down_set_id_list
dataset['play_id']=play_in_game_list

In [None]:
# Observe Result
dataset.loc[dataset['game_id']==2009091000,['game_id','drive','down','down_set','posteam']].head(15)

In [None]:
# Build requirements to find absorption states

# Create identifier for the last play in a down set
dataset['down_set_max_play']=dataset.groupby(['game_id','drive','down_set'])['play_id'].transform(max)

# Create identifier for the last play in a drive
dataset['drive_max_play']=dataset.groupby(['game_id','drive'])['play_id'].transform(max)

# Create identifier for the last play in a game
dataset['game_half_max_play']=dataset.groupby(['game_id','game_half'])['play_id'].transform(max)

# Find yardline of next down_set
# dataset=dataset.drop(['yardline_next_down_set'],axis=1)
first_down=dataset.loc[dataset['down']==1,['game_id','drive','down_set','yardline']].rename(columns={'yardline': 'yardline_next_down_set'})
first_down['down_set']=first_down['down_set']-1
dataset=dataset.merge(first_down,how='left',on=['game_id','drive','down_set'])

# Filter to the last down in the down_set
last_play_in_down_set=dataset[dataset['play_id']==dataset['down_set_max_play']]

# Observe Result
dataset.loc[dataset['game_id']==2009091000,['game_id','drive','down','down_set','play_id','yardline','posteam','down_set_max_play','drive_max_play','game_half_max_play','yardline_next_down_set']].head(20)

In [None]:
# Find the absorption state of the down set

# 19 different options
# 10 options where the drive continues
# One for each yardline position
# 9 options where the drive ends
# touchdown, field goal, safety, missed field goal, fumble, interception, turnover on downs, punt, end of half or game.

last_play_in_down_set['absorption_state']=np.where(last_play_in_down_set['play_id']!=last_play_in_down_set['drive_max_play'],last_play_in_down_set['yardline_next_down_set'].apply(str),
                                              np.where(last_play_in_down_set['touchdown']==1,'touchdown',np.where(last_play_in_down_set['field_goal_result']=='made','field_goal',np.where(last_play_in_down_set['safety']==1,'safety',np.where(last_play_in_down_set['field_goal_result'].isin(['missed','blocked']),'missed_field_goal',np.where(last_play_in_down_set['fumble']==1,'fumble',np.where(last_play_in_down_set['interception']==1,'interception',np.where(last_play_in_down_set['kick_distance'].isnull()==False,'punt',np.where(last_play_in_down_set['play_id']==last_play_in_down_set['game_half_max_play'],'end_of_half','turnover')))))))))

In [None]:
# Observe down set absorption states

print(last_play_in_down_set['absorption_state'].isnull().value_counts())
print(len(last_play_in_down_set['absorption_state'].unique()))
last_play_in_down_set['absorption_state'].value_counts()

In [None]:
# Observe results for a game

# last_play_in_down_set.loc[(last_play_in_down_set['absorption_state']=='nan'),['game_id','drive','down','down_set','play_id','down_set_max_play','drive_max_play','game_half_max_play','yardline','posteam','yardline_next_down_set','touchdown','field_goal_result','safety','fumble','interception','kick_distance','half_seconds_remaining','game_seconds_remaining','ydstogo_archive','yards_gained','penalty','absorption_state']].head(20)
last_play_in_down_set.loc[last_play_in_down_set['game_id']==2009091000,['game_id','drive','down','down_set','play_id','down_set_max_play','drive_max_play','game_half_max_play','yardline','posteam','yardline_next_down_set','touchdown','field_goal_result','safety','fumble','interception','kick_distance','half_seconds_remaining','game_seconds_remaining','ydstogo_archive','yards_gained','penalty','absorption_state']]

In [None]:
# Join absorption_state to the dataset with all plays

dataset_len_before=len(dataset)
dataset=dataset.merge(last_play_in_down_set.loc[:,['game_id','drive','down_set','absorption_state']],how='left',on=['game_id','drive','down_set'])
assert len(dataset) == dataset_len_before
dataset.head()

In [None]:
# Select only required columns

# dataset=dataset_archive
dataset_archive=dataset

dataset=dataset.loc[:,['game_id','game_date','drive','down_set','down_set_id','play_id','down','yardline','ydstogo','absorption_state','half_seconds_remaining']]

dataset.head()

In [None]:
# Save dataset after part 1 data preparation
dataset_dataprep_part1=dataset

In [None]:
dataset=dataset_dataprep_part1

# 3 - Data Preparation Part 2
Preparing data for the neural network

In [None]:
# Scale half_seconds remaining over total seconds in half of 1800
dataset['half_seconds_remaining']=dataset['half_seconds_remaining']/1800
print(dataset['half_seconds_remaining'].max())
print(dataset['half_seconds_remaining'].min())
dataset['half_seconds_remaining'].head(10)

In [None]:
# Build a matrix with the intersection of yardline and ydstogo
# In the future, look to vectorize the for loop

innerstates = np.zeros([10,11,len(dataset)])

for i in range(innerstates.shape[2]):
    innerstates[(dataset['yardline']/10).astype(int)[i]-1,dataset['ydstogo'][i]-1,i]=1

innerstates

In [None]:
# Observe results
innerstates[:,:,0]

In [None]:
# Unroll the matrix horizontally
innerstates_reshaped=innerstates.reshape(innerstates.shape[0]*innerstates.shape[1],innerstates.shape[2])
innerstates_reshaped.shape

In [None]:
# Verify horizontal unrolling & observe results
i=0
yardline=(dataset['yardline']/10).astype(int)[i]
ydstogo=dataset['ydstogo'][i]
# print(yardline)
# print(ydstogo)
print((yardline-1)*11+ydstogo-1)
assert ((yardline-1)*11+ydstogo-1) == list(innerstates_reshaped[:,i]).index(1)
innerstates_reshaped[:,0]

In [None]:
# Build absorption states
# Build a matrix with the intersection of yardline and ydstogo
# In the future, look to vectorize the for loop

print(dataset['absorption_state'].unique())

absorption_states=['10.0', '20.0', '30.0', '40.0', '50.0', '60.0', '70.0', '80.0', '90.0', '100.0',
 'punt', 'missed_field_goal', 'interception', 'touchdown', 'fumble', 'field_goal', 'end_of_half', 'turnover', 'safety']

print(absorption_states)

absorption_states_reshaped = np.zeros([len(absorption_states),len(dataset)])

absorption_states_reshaped

for i in range(absorption_states_reshaped.shape[1]):
    absorption_states_reshaped[absorption_states.index(dataset['absorption_state'][i]),i]=1

absorption_states_reshaped

In [None]:
# Observe results
print(absorption_states.index(dataset['absorption_state'][i]))
list(absorption_states_reshaped[:,i])

In [None]:
# Partition data into training, hypertuning, and testing
# I will use 2009-2016, 2017, and 2018, for each respectively
dataset['partition']=np.where(pd.DatetimeIndex(dataset['game_date']).year==2018,'testing',np.where(pd.DatetimeIndex(dataset['game_date']).year==2017,'hypertuning','training'))
# Games by partition
print(dataset.groupby(['partition']).game_id.nunique().sort_values(ascending=False))
print(dataset.groupby(['partition']).down_set_id.nunique().sort_values(ascending=False))

In [None]:
# Combine data
# identifiers (3) + innerstates (110) + features (1) + absorptionstates (19) = 
dataset_reshaped=pd.concat([dataset.loc[:,['down_set_id','down','partition']],pd.DataFrame(innerstates_reshaped.T),dataset.loc[:,['half_seconds_remaining']],pd.DataFrame(absorption_states_reshaped.T)],axis=1)
print(dataset_reshaped.shape)
dataset_reshaped.head()

In [None]:
# Separate each partition
training_withid=dataset_reshaped[dataset_reshaped['partition']=='training'].drop(['partition'],axis=1)
hypertuning_withid=dataset_reshaped[dataset_reshaped['partition']=='hypertuning'].drop(['partition'],axis=1)
testing_withid=dataset_reshaped[dataset_reshaped['partition']=='testing'].drop(['partition'],axis=1)

training=training_withid.drop(['down_set_id'],axis=1)
hypertuning=hypertuning_withid.drop(['down_set_id'],axis=1)
testing=testing_withid.drop(['down_set_id'],axis=1)

training_x=np.array(training.iloc[:,0:(len(training.columns)-19)]).T
hypertuning_x=np.array(hypertuning.iloc[:,0:(len(hypertuning.columns)-19)]).T
testing_x=np.array(testing.iloc[:,0:(len(testing.columns)-19)]).T

training_y=np.array(training.iloc[:,(len(training.columns)-19):len(training.columns)]).T
hypertuning_y=np.array(hypertuning.iloc[:,(len(hypertuning.columns)-19):len(hypertuning.columns)]).T
testing_y=np.array(testing.iloc[:,(len(testing.columns)-19):len(testing.columns)]).T

In [None]:
# Observe training records by down set
print(dataset.groupby(['down']).down_set_id.nunique().sort_values(ascending=False))

In [None]:
# Confirm shapes
# shape = (n_x/n_y, m training examples)
print(training_x.shape)
print(training_y.shape)

In [None]:
# Can get data for a down as follows
down=4
print(training_x[:,training_x[0,:]==down][1:,:].shape)
training_x[:,training_x[0,:]==down][1:,:]

# 4 - Model Training

In [63]:
# Define function to save params locally

def save_params(parameters,path = 'modelparams'):
    time=str(datetime.now())[0:10]+'--'+str(datetime.now())[11:13]+'-'+str(datetime.now())[14:16]
    filename = f"""params-{time}.npy"""
    np.save(os.path.join(path,filename),  parameters)

In [64]:
def load_params(filename, path = "modelparams"):
    parameters=np.load(os.path.join(path,filename),allow_pickle=True)
    return np.array(parameters, ndmin=1)[0]

In [65]:
def load_last_params(path = "modelparams"):
    for i, filename in enumerate(reversed(os.listdir(path))):
        if i==0: parameters=np.load(os.path.join(path,filename),allow_pickle=True)
        return np.array(parameters, ndmin=1)[0]

In [66]:
# Define training function

def L_layer_model(X, Y, layers_dims, parameters, learning_rate = 0.001, batch_size = 1000, num_iterations = 3000, down = 4, print_cost = False, last_layer_activation = 'softmax'):
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->last_layer_activation.
    
    Arguments:
    X -- input data, of shape (n_x, number of examples)
    Y -- true "label" vector of absorption states (contains 1 for the actual absorption state and 0 otherwise), of shape (19, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    # Get Layer Count
    
    L = len(parameters) // 2
    
    # Subset full down data to only the specified down
    
    matching_down = X[0,:]==down
        
    X = X[:,matching_down][1:,:]
    Y = Y[:,matching_down]

    np.random.seed(1)
    costs = []                         # keep track of cost
    
    # Parameters initialization.
    if parameters is None:
        parameters = initialize_parameters_deep(layers_dims)
        
    # Define mini batch function
        
    def random_mini_batches(X,Y,minibatch_size):

        m = Y.shape[1]            # number of examples

        # Lets shuffle X and Y
        permutation = list(np.random.permutation(m))            # shuffled index of examples
        shuffled_X = X[:, permutation]
        shuffled_Y = Y[:, permutation]

        minibatches = []                                        # we will append all minibatch_Xs and minibatch_Ys to this minibatch list 
        number_of_minibatches = int(m/minibatch_size)           # number of mini batches 

        for k in range(number_of_minibatches):
            minibatch_X = shuffled_X[:,k*minibatch_size: (k+1)*minibatch_size ]
            minibatch_Y = shuffled_Y[:,k*minibatch_size: (k+1)*minibatch_size ]
            minibatch_pair = (minibatch_X , minibatch_Y)                        #tuple of minibatch_X and miinibatch_Y
            minibatches.append(minibatch_pair)
        if m%minibatch_size != 0 :
            last_minibatch_X = shuffled_X[:,(k+1)*minibatch_size: m ]
            last_minibatch_Y = shuffled_Y[:,(k+1)*minibatch_size: m ]
            last_minibatch_pair = (last_minibatch_X , last_minibatch_Y)
            minibatches.append(last_minibatch_pair)
        return minibatches
        
    # Loop (gradient descent)
    for i in range(0, num_iterations):
        
        batches = random_mini_batches(X, Y, batch_size)
        
        for i, batch in enumerate(batches):
                    
            X_b, Y_b = batch

            # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> last_layer_activation.
            AL, caches = L_model_forward(X_b, parameters, last_layer_activation, down)

            # Return results for debugging
    #         return AL, Y, caches

            # Compute cost.
            cost = compute_cost(AL, Y_b, last_layer_activation)
            
            # Backward propagation.
            grads = L_model_backward(AL, Y_b, caches, last_layer_activation, L, down)

            # Update parameters.
            parameters = update_parameters(parameters, grads, learning_rate, down)

            # Print the cost every 100 iterations
            if print_cost and i % 100 == 0 or i == num_iterations - 1:
                print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
            if i % 100 == 0 or i == num_iterations:
                costs.append(cost)
    
    return parameters, costs

In [9]:
layers_dims=(111,111,111,111,19)

In [1092]:
# Parameters initialization.
parameters = initialize_parameters_deep(layers_dims)

In [None]:
parameters=load_last_params()

In [106]:
# Test Single Run
# down=4
# parameters_test, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 100, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')

# print("Cost after first iteration: " + str(costs[0]))

In [1429]:
# Train on Down 4 data with 1 layer
for global_its in range(1):
    for d in reversed(range(3,4)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 100, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

initiating set of iterations for down 4
Cost after first iteration: -0.6907273657412941


In [1191]:
# Train on Down 3 data with 2 layers
for global_its in range(10):
    for d in reversed(range(2,3)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 100, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

In [None]:
# Initialize Parameters W2 to be W3
parameters['W2']=parameters['W3']

In [1433]:
# Train on Down 2 data with 3 layers
for global_its in range(10):
    for d in reversed(range(1,2)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 100, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

In [1263]:
# Initialize Parameters W1 to be W2
parameters['W1']=parameters['W2']

In [None]:
# Train on Down 1 data with 4 layers
for global_its in range(10):
    for d in reversed(range(0,1)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 100, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

In [105]:
# Continue training on all downs
for global_its in range(5):
    for d in reversed(range(1,4)):
        down = d + 1
        print(f"""initiating set of iterations for down {down}""")
        parameters, costs = L_layer_model(training_x, training_y, layers_dims, parameters, learning_rate = 0.001, batch_size = 100, num_iterations = 1000, down = down, print_cost = False, last_layer_activation = 'softmax')
        print("Cost after first iteration: " + str(costs[0]))
    save_params(parameters)

initiating set of iterations for down 4
Cost after first iteration: -0.6981177381690686
initiating set of iterations for down 3
Cost after first iteration: -1.9352470667360313
initiating set of iterations for down 2
Cost after first iteration: -1.808823686567675
initiating set of iterations for down 4
Cost after first iteration: -0.6985970863124373
initiating set of iterations for down 3
Cost after first iteration: -1.868319395730734
initiating set of iterations for down 2
Cost after first iteration: -1.8016907791895451
initiating set of iterations for down 4
Cost after first iteration: -0.696117127419958
initiating set of iterations for down 3
Cost after first iteration: -1.87622006012111
initiating set of iterations for down 2
Cost after first iteration: -1.7963043668423462
initiating set of iterations for down 4
Cost after first iteration: -0.6934956805849904
initiating set of iterations for down 3
Cost after first iteration: -1.9155381917053242
initiating set of iterations for down

# 5 - Model Evaluation

In [109]:
# Training Set

evaldata_x = training_x
evaldata_y = training_y

for d in reversed(range(4)):
    down = d + 1
    print(f"""Evaluation for down {down}""")
    pred_down = predict(evaldata_x[:,evaldata_x[0,:]==down][0:,:], evaldata_y[:,evaldata_x[0,:]==down][:,:], parameters, last_layer_activation = 'softmax', down = down, return_probs = False)
    m=evaldata_x[:,evaldata_x[0,:]==down].shape[1]
    unique, counts = np.unique(np.tile(np.array(absorption_states,ndmin=2).T,(1, m))[pred_down==1], return_counts=True)
    print(np.asarray((unique, counts)).T)

Evaluation for down 4
Accuracy: 0.7326651538073626
Cost: -0.8900151975281239
[['30.0' '87']
 ['40.0' '147']
 ['field_goal' '8780']
 ['missed_field_goal' '96']
 ['punt' '21644']
 ['turnover' '974']]
Evaluation for down 3
Accuracy: 0.40621836831095215
Cost: -1.7920493522810965
[['10.0' '1846']
 ['20.0' '3783']
 ['30.0' '2573']
 ['40.0' '2192']
 ['50.0' '344']
 ['60.0' '965']
 ['70.0' '850']
 ['90.0' '63']
 ['end_of_half' '283']
 ['field_goal' '4032']
 ['missed_field_goal' '24']
 ['punt' '38163']
 ['touchdown' '3682']
 ['turnover' '476']]
Evaluation for down 2
Accuracy: 0.38842182167119194
Cost: -1.7112432636505381
[['10.0' '3476']
 ['20.0' '5036']
 ['30.0' '7334']
 ['40.0' '4869']
 ['50.0' '5119']
 ['60.0' '4625']
 ['70.0' '6230']
 ['80.0' '1472']
 ['90.0' '254']
 ['end_of_half' '2112']
 ['field_goal' '7103']
 ['punt' '38183']
 ['touchdown' '5894']]
Evaluation for down 1
Accuracy: 0.025450299655437072
Cost: -5.69507494582952
[['end_of_half' '122474']]


In [110]:
# Testing Set

evaldata_x = testing_x
evaldata_y = testing_y

for d in reversed(range(4)):
    down = d + 1
    print(f"""Evaluation for down {down}""")
    pred_down = predict(evaldata_x[:,evaldata_x[0,:]==down][0:,:], evaldata_y[:,evaldata_x[0,:]==down][:,:], parameters, last_layer_activation = 'softmax', down = down, return_probs = False)
    m=evaldata_x[:,evaldata_x[0,:]==down].shape[1]
    unique, counts = np.unique(np.tile(np.array(absorption_states,ndmin=2).T,(1, m))[pred_down==1], return_counts=True)
    print(np.asarray((unique, counts)).T)

Evaluation for down 4
Accuracy: 0.7190909090909091
Cost: nan
[['30.0' '6']
 ['40.0' '12']
 ['field_goal' '948']
 ['missed_field_goal' '11']
 ['punt' '2224']
 ['turnover' '98']]
Evaluation for down 3
Accuracy: 0.4012872083668544
Cost: nan
[['10.0' '189']
 ['20.0' '419']
 ['30.0' '270']
 ['40.0' '214']
 ['50.0' '35']
 ['60.0' '113']
 ['70.0' '52']
 ['80.0' '1']
 ['90.0' '9']
 ['end_of_half' '30']
 ['field_goal' '435']
 ['missed_field_goal' '1']
 ['punt' '3953']
 ['touchdown' '426']
 ['turnover' '66']]
Evaluation for down 2
Accuracy: 0.3751505419510237
Cost: nan
[['10.0' '374']
 ['20.0' '603']
 ['30.0' '783']
 ['40.0' '568']
 ['50.0' '588']
 ['60.0' '674']
 ['70.0' '552']
 ['80.0' '156']
 ['90.0' '25']
 ['end_of_half' '235']
 ['field_goal' '789']
 ['punt' '3949']
 ['touchdown' '662']]
Evaluation for down 1
Accuracy: 0.025032938076416336
Cost: nan
[['end_of_half' '13653']]


# 5 - Model Predictions with New Data

In [111]:
def predict_new_data(down, yardline, ydstogo, half_seconds_remaining, yardline_buckets=10, yards_to_go_max=11, secondsinhalf=1800):
    
    # Create yardline field that adjusts yardline_100 into Buckets of 10
    yardline=np.ceil(yardline/yardline_buckets)*yardline_buckets

    # Create yards to go field with a final bucket of 11+
    ydstogo=np.where(ydstogo>yards_to_go_max,yards_to_go_max,ydstogo)
    
    # Scale half_seconds remaining over total seconds in half of 1800
    half_seconds_remaining=half_seconds_remaining/secondsinhalf
    
    # Build a matrix with the intersection of yardline and ydstogo
    innerstates = np.zeros([yardline_buckets,yards_to_go_max,1])
    
    for i in range(innerstates.shape[2]):
        innerstates[(yardline/10).astype(int)-1,ydstogo-1,i]=1
        
    # Unroll the matrix horizontally
    innerstates_reshaped=innerstates.reshape(innerstates.shape[0]*innerstates.shape[1],innerstates.shape[2])
    
    # Combine data
    # Can omit concatenating down in this case as it's passed as an argument
    data_reshaped=pd.concat([pd.DataFrame({down}),pd.DataFrame(innerstates_reshaped.T),pd.DataFrame({half_seconds_remaining})],axis=1)
    
    data_reshaped=np.array(data_reshaped).T
                             
    preds = predict(data_reshaped, None, parameters, last_layer_activation = 'softmax', down = down, return_probs = True)
    
    df = pd.concat([pd.DataFrame(absorption_states),pd.DataFrame(preds)],axis=1)
    df.columns = ['absorption_state','predicted_probability']
    
    return df

In [None]:
parameters=load_last_params()

In [112]:
down = 1
yardline = 10
ydstogo = 5
half_seconds_remaining = 250

predict_new_data(down, yardline, ydstogo, half_seconds_remaining)

Unnamed: 0,absorption_state,predicted_probability
0,10.0,0.014149
1,20.0,0.014022
2,30.0,0.008544
3,40.0,0.003812
4,50.0,0.001513
5,60.0,0.000641
6,70.0,0.00022
7,80.0,2.5e-05
8,90.0,8e-06
9,100.0,2.1e-05
