# NFL Big Data Bowl 2024

## Importing dataview libraries

In [40]:
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [15]:
DATA_ROOT = '../data/'

In [16]:
plays = pd.read_csv(DATA_ROOT + 'plays.csv')

In [17]:
def playDescriptionToDirection(play_description):
    play = re.search('(pass (?:short|deep) (?:left|middle|right))|((?:left|right) (?:guard|tackle|end))|(up the middle)', play_description)
    return play.group(0) if play else 'FUMBLED BALL'

In [18]:
print('Direction')
plays['playDirection'] = plays['playDescription'].map(playDescriptionToDirection)

Direction


In [19]:
import tensorflow as tf

def stack_dict(inputs, fun=tf.stack):
    values = []
    for key in sorted(inputs.keys()):
      values.append(tf.cast(inputs[key], tf.float32))

    return fun(values, axis=-1)

In [20]:
print(plays['offenseFormation'])

0           SHOTGUN
1           SHOTGUN
2            I_FORM
3        SINGLEBACK
4            I_FORM
            ...    
12481    SINGLEBACK
12482    SINGLEBACK
12483       SHOTGUN
12484       SHOTGUN
12485        I_FORM
Name: offenseFormation, Length: 12486, dtype: object


In [48]:
numeric_feature_names = ['down', 'yardsToGo', 'defendersInTheBox', 'absoluteYardlineNumber']
categoric_feature_names = ['offenseFormation']
binary_feature_names = []
target_name = 'playDirection'

df = plays[numeric_feature_names + categoric_feature_names + binary_feature_names + [target_name]].copy()
df = df.dropna()
df = pd.get_dummies(df, columns=categoric_feature_names)
df = df.iloc[np.random.permutation(len(df))].reset_index(drop=True)

target = df.pop(target_name)
target = pd.get_dummies(target)

down                             int64
yardsToGo                        int64
defendersInTheBox              float64
absoluteYardlineNumber           int64
offenseFormation_EMPTY            bool
offenseFormation_I_FORM           bool
offenseFormation_JUMBO            bool
offenseFormation_PISTOL           bool
offenseFormation_SHOTGUN          bool
offenseFormation_SINGLEBACK       bool
offenseFormation_WILDCAT          bool
dtype: object
FUMBLED BALL         bool
left end             bool
left guard           bool
left tackle          bool
pass deep left       bool
pass deep middle     bool
pass deep right      bool
pass short left      bool
pass short middle    bool
pass short right     bool
right end            bool
right guard          bool
right tackle         bool
up the middle        bool
dtype: object


In [68]:
x, x_val = df[:int(len(df) * 0.8)], df[int(len(df) * 0.8):]
y, y_val = target[:int(len(target) * 0.8)], target[int(len(target) * 0.8):]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=1)
x_train = np.asarray(x_train).astype('float32')
x_test = np.asarray(x_test).astype('float32')
x_val = np.asarray(x_val).astype('float32')
y_train = np.asarray(y_train).astype('float32')
y_test = np.asarray(y_test).astype('float32')
y_val = np.asarray(y_val).astype('float32')
print(x_train.shape)

(6689, 11)


In [72]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(11 ,activation='relu', input_shape=(11,)))
model.add(tf.keras.layers.Dense(14, activation='relu'))

opt = tf.keras.optimizers.legacy.Adam(learning_rate=1e-3, decay=1e-5)

model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

In [73]:
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))

Epoch 1/10


2023-11-18 14:36:37.656010: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2d691abd0>

In [77]:
loss = model.evaluate(x_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)

Test Accuracy: 2.770
