In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix
from catboost import CatBoostClassifier

RANDOM_SEED = 42
tf.random.set_seed(RANDOM_SEED)

## Load data and map target

In [3]:
df = pd.read_csv('../dataset/nfl_filtered.csv')
df['play_type'] = df['play_type'].map({'run': 0, 'pass': 1}).astype(int)

if 'play_id' in df.columns:
    df = df.drop(columns=['play_id'])

X_raw = df.drop(columns=['play_type'])
y = df['play_type']
categorical_cols = X_raw.select_dtypes(include=['object']).columns.tolist()
print(f'Rows: {len(df)}, features: {X_raw.shape[1]}')
print(f'Categorical cols: {categorical_cols}')

Rows: 318668, features: 19
Categorical cols: ['posteam', 'defteam', 'posteam_type', 'game_half', 'side_of_field']


## Train/val/test split (70/15/15)

In [4]:
X_train_raw, X_temp_raw, y_train, y_temp = train_test_split(
    X_raw, y, test_size=0.3, random_state=RANDOM_SEED, stratify=y
)
X_val_raw, X_test_raw, y_val, y_test = train_test_split(
    X_temp_raw, y_temp, test_size=0.5, random_state=RANDOM_SEED, stratify=y_temp
)

cat_feature_indices = [X_train_raw.columns.get_loc(col) for col in categorical_cols]
print('Splits ->', X_train_raw.shape, X_val_raw.shape, X_test_raw.shape)

Splits -> (223067, 19) (47800, 19) (47801, 19)


## Train CatBoost (base learner)

In [5]:
cat_model = CatBoostClassifier(
    loss_function='Logloss',
    eval_metric='AUC',
    depth=6,
    learning_rate=0.1,
    iterations=500,
    od_type='Iter',
    od_wait=50,
    random_seed=RANDOM_SEED,
    verbose=100
)
cat_model.fit(
    X_train_raw, y_train,
    cat_features=cat_feature_indices,
    eval_set=(X_val_raw, y_val),
    use_best_model=True
)

0:	test: 0.7760523	best: 0.7760523 (0)	total: 351ms	remaining: 2m 55s
100:	test: 0.8029238	best: 0.8029238 (100)	total: 12.2s	remaining: 48.1s
200:	test: 0.8062116	best: 0.8062116 (200)	total: 23.7s	remaining: 35.2s
300:	test: 0.8071434	best: 0.8071434 (300)	total: 35.2s	remaining: 23.2s
400:	test: 0.8076229	best: 0.8076350 (379)	total: 46.9s	remaining: 11.6s
499:	test: 0.8078728	best: 0.8078818 (488)	total: 58.5s	remaining: 0us

bestTest = 0.8078817963
bestIteration = 488

Shrink model to first 489 iterations.


<catboost.core.CatBoostClassifier at 0x23ae2a45fd0>

## CatBoost probabilities as meta-feature

In [6]:
proba_train = cat_model.predict_proba(X_train_raw)[:, 1]
proba_val = cat_model.predict_proba(X_val_raw)[:, 1]
proba_test = cat_model.predict_proba(X_test_raw)[:, 1]

print('CatBoost AUC (val):', roc_auc_score(y_val, proba_val))
print('CatBoost AUC (test):', roc_auc_score(y_test, proba_test))

CatBoost AUC (val): 0.8078817963379832
CatBoost AUC (test): 0.8054313304709432


## Label encode categorical features for the NN

In [7]:
label_encoders = {}
X_train_enc = X_train_raw.copy()
X_val_enc = X_val_raw.copy()
X_test_enc = X_test_raw.copy()

for col in categorical_cols:
    le = LabelEncoder()
    le.fit(X_train_enc[col].astype(str))
    label_encoders[col] = le
    X_train_enc[col] = le.transform(X_train_enc[col].astype(str))
    X_val_enc[col] = le.transform(X_val_enc[col].astype(str))
    X_test_enc[col] = le.transform(X_test_enc[col].astype(str))

X_train_enc['cat_proba'] = proba_train
X_val_enc['cat_proba'] = proba_val
X_test_enc['cat_proba'] = proba_test

categorical_for_nn = categorical_cols
numerical_for_nn = [c for c in X_train_enc.columns if c not in categorical_for_nn]
print('NN categorical:', categorical_for_nn)
print('NN numerical:', numerical_for_nn)

NN categorical: ['posteam', 'defteam', 'posteam_type', 'game_half', 'side_of_field']
NN numerical: ['yardline_100', 'qtr', 'down', 'ydstogo', 'goal_to_go', 'score_differential', 'drive', 'posteam_timeouts_remaining', 'defteam_timeouts_remaining', 'shotgun', 'no_huddle', 'quarter_seconds_remaining', 'half_seconds_remaining', 'game_seconds_remaining', 'cat_proba']


## Train/val/test tensors

In [8]:
X_train_cat = [X_train_enc[col].values for col in categorical_for_nn]
X_val_cat = [X_val_enc[col].values for col in categorical_for_nn]
X_test_cat = [X_test_enc[col].values for col in categorical_for_nn]

scaler = StandardScaler()
X_train_num = scaler.fit_transform(X_train_enc[numerical_for_nn])
X_val_num = scaler.transform(X_val_enc[numerical_for_nn])
X_test_num = scaler.transform(X_test_enc[numerical_for_nn])

train_inputs = X_train_cat + [X_train_num]
val_inputs = X_val_cat + [X_val_num]
test_inputs = X_test_cat + [X_test_num]

print('Prepared NN inputs:')
print('  categorical tensors:', len(categorical_for_nn))
print('  numeric shape:', X_train_num.shape)

Prepared NN inputs:
  categorical tensors: 5
  numeric shape: (223067, 15)


## Build CatBoosted MLP

In [9]:
embedding_dims = {
    'posteam': 8,
    'defteam': 8,
    'posteam_type': 2,
    'game_half': 2,
    'side_of_field': 4
}

cat_inputs = []
cat_embeds = []
for col in categorical_for_nn:
    vocab = len(label_encoders[col].classes_)
    dim = embedding_dims.get(col, 4)
    inp = keras.Input(shape=(1,), name=f'{col}_input')
    emb = keras.layers.Embedding(input_dim=vocab, output_dim=dim, name=f'{col}_emb')(inp)
    cat_inputs.append(inp)
    cat_embeds.append(keras.layers.Flatten()(emb))

cat_concat = keras.layers.Concatenate(name='cat_concat')(cat_embeds)
num_input = keras.Input(shape=(X_train_num.shape[1],), name='num_input')

combined = keras.layers.Concatenate(name='features')([cat_concat, num_input])
x = keras.layers.Dense(160, activation='relu')(combined)
x = keras.layers.Dropout(0.35)(x)
x = keras.layers.Dense(96, activation='relu')(x)
x = keras.layers.Dropout(0.25)(x)
x = keras.layers.Dense(48, activation='relu')(x)
x = keras.layers.Dropout(0.2)(x)
out = keras.layers.Dense(1, activation='sigmoid')(x)

catnn_model = keras.Model(inputs=cat_inputs + [num_input], outputs=out, name='CatBoosted_MLP')
catnn_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0008),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.AUC(name='auc')]
)
catnn_model.summary()

## Train CatBoosted MLP

In [10]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_auc', patience=6, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-6, verbose=1)
]
history = catnn_model.fit(
    train_inputs,
    y_train.values,
    validation_data=(val_inputs, y_val.values),
    epochs=60,
    batch_size=256,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/60
[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7311 - auc: 0.7996 - loss: 0.5335 - val_accuracy: 0.7329 - val_auc: 0.8072 - val_loss: 0.5240 - learning_rate: 8.0000e-04
Epoch 2/60
[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7382 - auc: 0.8097 - loss: 0.5217 - val_accuracy: 0.7322 - val_auc: 0.8073 - val_loss: 0.5241 - learning_rate: 8.0000e-04
Epoch 3/60
[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7391 - auc: 0.8118 - loss: 0.5193 - val_accuracy: 0.7331 - val_auc: 0.8075 - val_loss: 0.5233 - learning_rate: 8.0000e-04
Epoch 4/60
[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7395 - auc: 0.8125 - loss: 0.5185 - val_accuracy: 0.7328 - val_auc: 0.8075 - val_loss: 0.5237 - learning_rate: 8.0000e-04
Epoch 5/60
[1m872/872[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7395 - auc: 

## Evaluate on test set

In [12]:
y_proba_test_catnn = catnn_model.predict(test_inputs, verbose=0).flatten()
y_pred_test_catnn = (y_proba_test_catnn >= 0.5).astype(int)

test_acc = accuracy_score(y_test, y_pred_test_catnn)
test_auc = roc_auc_score(y_test, y_proba_test_catnn)

print(f'Test Accuracy (CatBoosted MLP): {test_acc:.4f}')
print(f'Test AUC (CatBoosted MLP): {test_auc:.4f}')
print('Classification Report:', classification_report(y_test, y_pred_test_catnn))
print('Confusion Matrix:', confusion_matrix(y_test, y_pred_test_catnn))

Test Accuracy (CatBoosted MLP): 0.7303
Test AUC (CatBoosted MLP): 0.8053
Classification Report:               precision    recall  f1-score   support

           0       0.66      0.71      0.69     19876
           1       0.78      0.74      0.76     27925

    accuracy                           0.73     47801
   macro avg       0.72      0.73      0.73     47801
weighted avg       0.73      0.73      0.73     47801

Confusion Matrix: [[14190  5686]
 [ 7205 20720]]
