In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [9]:
df = pd.read_csv("data/allHorizonData_cut.csv")
df['c5'] = df['c5'] - 1

features = ['gameLength', 'uc', 'r1', 'r2', 'r3', 'r4', 'c1', 'c2', 'c3', 'c4']
target = 'c5'

X = df[features].values
y = df[target].values

# Normalize input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/val split
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Convert to PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=64)


In [10]:
unique_vals = torch.unique(y_val_tensor)
print("Unique label values:", unique_vals)

Unique label values: tensor([0., 1.])


In [11]:
class DeepMLP(nn.Module):
    def __init__(self, input_dim, hidden_dims=[64, 32, 16], dropout=0.3):
        super(DeepMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.bn1 = nn.BatchNorm1d(hidden_dims[0])

        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.bn2 = nn.BatchNorm1d(hidden_dims[1])

        self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        self.bn3 = nn.BatchNorm1d(hidden_dims[2])

        self.output = nn.Linear(hidden_dims[2], 1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        return torch.sigmoid(self.output(x))


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = DeepMLP(input_dim=X_train.shape[1]).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb).squeeze()
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            val_preds = model(X_val_tensor.to(device)).squeeze()
            val_binary = (val_preds > 0.5).int().cpu()
            acc = (val_binary == y_val_tensor.int()).float().mean()
            print(f"Epoch {epoch} | Val Acc: {acc:.4f} | Loss: {total_loss:.4f}")


Epoch 0 | Val Acc: 0.7628 | Loss: 157.6974
Epoch 10 | Val Acc: 0.7885 | Loss: 121.8145
Epoch 20 | Val Acc: 0.7935 | Loss: 118.6439
Epoch 30 | Val Acc: 0.7937 | Loss: 118.1194
Epoch 40 | Val Acc: 0.7922 | Loss: 117.9861
Epoch 50 | Val Acc: 0.7945 | Loss: 117.4256
Epoch 60 | Val Acc: 0.7898 | Loss: 116.7513
Epoch 70 | Val Acc: 0.7922 | Loss: 117.9446
Epoch 80 | Val Acc: 0.7922 | Loss: 116.8434
Epoch 90 | Val Acc: 0.7935 | Loss: 115.5158


In [13]:
model.eval()
with torch.no_grad():
    final_preds = model(X_val_tensor.to(device)).squeeze()
    final_binary = (final_preds > 0.5).int().cpu()
    final_accuracy = (final_binary == y_val_tensor.int()).float().mean().item()

print(f"\nFinal Validation Accuracy: {final_accuracy:.4f}")


Final Validation Accuracy: 0.7914


In [14]:
# Classify by horizon
_, df_val = train_test_split(df, test_size=0.2, random_state=42, stratify=df['c5'])

preds = model(X_val_tensor.to(device)).squeeze().cpu().detach().numpy()
binary_preds = (preds > 0.5).astype(int)

h1_mask = df_val['gameLength'].values == 5
h6_mask = df_val['gameLength'].values == 10

preds = model(X_val_tensor.to(device)).squeeze().cpu().detach().numpy()
binary_preds = (preds > 0.5).astype(int)

print("H1 Accuracy:", np.mean(binary_preds[h1_mask] == y_val[h1_mask]))
print("H6 Accuracy:", np.mean(binary_preds[h6_mask] == y_val[h6_mask]))


H1 Accuracy: 0.8443145589798087
H6 Accuracy: 0.7405515832482125
