In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix

from catboost import CatBoostClassifier
import catboost
catboost.__version__

'1.2.8'

In [31]:
df = pd.read_csv("../dataset/nfl_encoded_v1.csv")
df.head()

Unnamed: 0,play_id,yardline_100,qtr,down,ydstogo,goal_to_go,score_differential,drive,posteam_timeouts_remaining,defteam_timeouts_remaining,...,side_of_field_OAK,side_of_field_PHI,side_of_field_PIT,side_of_field_SD,side_of_field_SEA,side_of_field_SF,side_of_field_STL,side_of_field_TB,side_of_field_TEN,side_of_field_WAS
0,68,58.0,1,1.0,10,0.0,0.0,1,3.0,3.0,...,False,False,True,False,False,False,False,False,False,False
1,92,53.0,1,2.0,5,0.0,0.0,1,3.0,3.0,...,False,False,True,False,False,False,False,False,False,False
2,113,56.0,1,3.0,8,0.0,0.0,1,3.0,3.0,...,False,False,True,False,False,False,False,False,False,False
3,162,98.0,1,1.0,10,0.0,0.0,2,3.0,3.0,...,False,False,False,False,False,False,False,False,True,False
4,183,98.0,1,2.0,10,0.0,0.0,2,3.0,3.0,...,False,False,False,False,False,False,False,False,True,False


In [32]:
if "play_id" in df.columns:
    df = df.drop(columns=["play_id"])

X = df.drop(columns=["play_type"])
y = df["play_type"]

categorical_features = X.select_dtypes(include=["object"]).columns.tolist()

print("Feature shape:", X.shape)
print("Target shape:", y.shape)
print("Categorical features:", categorical_features)

Feature shape: (318668, 120)
Target shape: (318668,)
Categorical features: []


In [33]:
#First split: train (70%) and temp (30%)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

#Second split: validation (15%) and test (15%)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Training shape:", X_train.shape)
print("Validation shape:", X_val.shape)
print("Testing shape:", X_test.shape)

Training shape: (223067, 120)
Validation shape: (47800, 120)
Testing shape: (47801, 120)


In [34]:
cat_feature_indices = [X.columns.get_loc(col) for col in categorical_features]

print("Categorical features:", categorical_features)
print("Categorical feature indices:", cat_feature_indices)

Categorical features: []
Categorical feature indices: []


In [35]:
cat_model = CatBoostClassifier(
    loss_function='Logloss',
    eval_metric='AUC',
    depth=6,
    learning_rate=0.1,
    iterations=500,
    od_type='Iter',
    od_wait=50,
    random_seed=42,
    verbose=100 #prints training progress every 100 iterations
)


In [36]:
cat_model.fit(
    X_train, y_train, cat_features=cat_feature_indices,
    eval_set=(X_val, y_val), use_best_model=True
)

0:	test: 0.7757842	best: 0.7757842 (0)	total: 34.5ms	remaining: 17.2s
100:	test: 0.8026231	best: 0.8026231 (100)	total: 2.74s	remaining: 10.8s
200:	test: 0.8065102	best: 0.8065124 (198)	total: 5s	remaining: 7.44s
300:	test: 0.8078534	best: 0.8078565 (299)	total: 7.2s	remaining: 4.76s
400:	test: 0.8086672	best: 0.8086672 (400)	total: 9.4s	remaining: 2.32s
499:	test: 0.8088268	best: 0.8088478 (491)	total: 12.3s	remaining: 0us

bestTest = 0.8088478471
bestIteration = 491

Shrink model to first 492 iterations.


<catboost.core.CatBoostClassifier at 0x202daa780d0>

Extract CatBoost Leaf Indices

In [37]:
# Leaf indices for each sample
train_leaves = cat_model.calc_leaf_indexes(catboost.Pool(X_train, cat_features=cat_feature_indices))
val_leaves   = cat_model.calc_leaf_indexes(catboost.Pool(X_val,   cat_features=cat_feature_indices))
test_leaves  = cat_model.calc_leaf_indexes(catboost.Pool(X_test,  cat_features=cat_feature_indices))


n_trees = train_leaves.shape[1]
n_leaf_values = train_leaves.max() + 1


Convert leaves into embeddings in PyTorch

In [38]:
import torch
from torch.utils.data import Dataset
import numpy as np

class PlayDataset(Dataset):
    def __init__(self, X_numeric, leaf_indices, y):
        # Convert DataFrame to float32 numpy array
        self.X_numeric = torch.tensor(X_numeric.values.astype(np.float32))
        self.leaf_indices = torch.tensor(leaf_indices.astype(np.int64))
        self.y = torch.tensor(y.values.astype(np.float32))

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X_numeric[idx], self.leaf_indices[idx], self.y[idx]



Neural Network with CatBoost Leaf Embeddings
- Each tree has about 30-60 leaves 9depending on depth)
- We embed each tree's leaf index and then concat them.

In [39]:
import torch.nn as nn
import torch.nn.functional as F

class CatBoostEmbeddingMLP(nn.Module):
    def __init__(self, n_numeric_features, n_trees, n_leaf_values, embed_dim=8):
        super().__init__()

        # Embedding layer: (total possible leaf ids → dense vectors)
        self.leaf_emb = nn.Embedding(n_leaf_values, embed_dim)

        # MLP input size = numeric features + (trees × embed_dim)
        mlp_input_dim = n_numeric_features + n_trees * embed_dim

        self.fc1 = nn.Linear(mlp_input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x_num, leaf_idx):
        # leaf_idx shape: (batch_size, n_trees)
        leaf_vectors = self.leaf_emb(leaf_idx)          # (batch, n_trees, embed_dim)
        leaf_vectors = leaf_vectors.view(leaf_vectors.size(0), -1)

        x = torch.cat([x_num, leaf_vectors], dim=1)

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))


Training Loop

In [43]:
from torch.utils.data import DataLoader
import torch.optim as optim

train_dataset = PlayDataset(X_train, train_leaves, y_train)
val_dataset   = PlayDataset(X_val, val_leaves, y_val)
test_dataset = PlayDataset(X_test, test_leaves, y_test)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=256)
test_loader  = DataLoader(test_dataset, batch_size=1024, shuffle=False)

model = CatBoostEmbeddingMLP(
    n_numeric_features=X_train.shape[1],
    n_trees=n_trees,
    n_leaf_values=n_leaf_values,
    embed_dim=8
)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCELoss()


Training

In [41]:
for epoch in range(10):
    model.train()
    for x_num, leaf_idx, y in train_loader:
        optimizer.zero_grad()
        pred = model(x_num, leaf_idx).squeeze()
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1} complete")


Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Epoch 4 complete
Epoch 5 complete
Epoch 6 complete
Epoch 7 complete
Epoch 8 complete
Epoch 9 complete
Epoch 10 complete


Predict

In [44]:
model.eval()
all_preds = []

with torch.no_grad():
    for x_num, leaf_idx, _ in test_loader:
        pred = model(x_num, leaf_idx).squeeze()
        all_preds.append(pred.cpu().numpy())

all_preds = np.concatenate(all_preds)

# 4. Convert to class labels
test_labels = (all_preds >= 0.5).astype(int)

print(all_preds[:10])   # probabilities
print(test_labels[:10]) # 0/1 labels

[0.48080644 0.43507108 0.44353333 0.46506396 0.18660279 0.9780633
 0.98092204 0.43333912 0.9795593  0.98371774]
[0 0 0 0 0 1 1 0 1 1]


Evaluate Model

In [45]:
# --- Evaluate model ---
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix

# Convert y_test (Pandas Series → numpy array)
y_test_np = y_test.values

# Accuracy
acc = accuracy_score(y_test_np, test_labels)

# AUC (needs probabilities, not labels)
auc = roc_auc_score(y_test_np, all_preds)

# Confusion matrix
cm = confusion_matrix(y_test_np, test_labels)

# Classification report
report = classification_report(y_test_np, test_labels)

print("Accuracy:", acc)
print("AUC:", auc)
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", report)


Accuracy: 0.5147172653291772
AUC: 0.4842270465426377

Confusion Matrix:
 [[ 9815 10061]
 [13136 14789]]

Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.49      0.46     19876
           1       0.60      0.53      0.56     27925

    accuracy                           0.51     47801
   macro avg       0.51      0.51      0.51     47801
weighted avg       0.53      0.51      0.52     47801

