In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
import numpy as np
import random
import pandas as pd

from chess_cnn import ChessCNN

LEARNING_RATE = 1e-3           # You may experiment with values like 5e-3 for faster learning
NUM_EPOCHS = 50
PATIENCE = 5
BATCH_SIZE = 512                # Configurable batch size

# --------------------------
# Data Preparation and Splitting (with batching)
# --------------------------

df = pd.read_parquet(r"C:\Users\forbe\OneDrive\Personal\Documents\repos\chess_data\df_features.parquet")

# Set seeds for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Convert features and labels from DataFrames.
# X: first 780 columns from df; y: the "result" column.
X = torch.tensor(df.iloc[:, :780].values, dtype=torch.float32).to(device)
y = torch.tensor(df["result"].values, dtype=torch.long).to(device)

# Create a TensorDataset
dataset = TensorDataset(X, y)
total_samples = len(dataset)

# Split dataset: 80% training, 10% validation, 10% testing.
train_size = int(0.8 * total_samples)
val_size = int(0.1 * total_samples)
test_size = total_samples - train_size - val_size

# Use a seeded generator for reproducibility in the random split
generator = torch.Generator().manual_seed(seed)
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size], generator=generator)

# Create DataLoaders for batching
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# --------------------------
# Model, Weight Initialization, and Optimizer
# --------------------------

model = ChessCNN().to(device)

# Weight initialization: Xavier uniform for Conv2d and Linear layers
def init_weights(m):
    if isinstance(m, (nn.Conv2d, nn.Linear)):
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)

model.apply(init_weights)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# --------------------------
# Training Loop with Early Stopping (with batching)
# --------------------------

best_val_loss = float('inf')
epochs_no_improve = 0

for epoch in range(NUM_EPOCHS):
    model.train()
    running_train_loss = 0.0
    total_train = 0
    
    for batch_x, batch_y in train_loader:
        optimizer.zero_grad()
        outputs_train = model(batch_x)
        loss_train = criterion(outputs_train, batch_y)
        loss_train.backward()
        optimizer.step()
        running_train_loss += loss_train.item() * batch_x.size(0)
        total_train += batch_x.size(0)
        
    train_loss = running_train_loss / total_train

    # Evaluate on the validation set
    model.eval()
    running_val_loss = 0.0
    total_val = 0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            outputs_val = model(batch_x)
            loss_val = criterion(outputs_val, batch_y)
            running_val_loss += loss_val.item() * batch_x.size(0)
            total_val += batch_x.size(0)
    val_loss = running_val_loss / total_val

    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        best_model_state = model.state_dict()  # Save best model's state
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= PATIENCE:
        print("Early stopping triggered")
        break

# Load the best model weights before testing
model.load_state_dict(best_model_state)

# --------------------------
# Testing (with batching)
# --------------------------

model.eval()
running_test_loss = 0.0
total_test = 0
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        outputs_test = model(batch_x)
        loss_test = criterion(outputs_test, batch_y)
        running_test_loss += loss_test.item() * batch_x.size(0)
        total_test += batch_x.size(0)
        
test_loss = running_test_loss / total_test
print(f"Final Test Loss: {test_loss:.4f}")


Epoch [1/50], Train Loss: 0.9806, Val Loss: 0.9586
Epoch [2/50], Train Loss: 0.9539, Val Loss: 0.9465
Epoch [3/50], Train Loss: 0.9470, Val Loss: 0.9458
Epoch [4/50], Train Loss: 0.9434, Val Loss: 0.9425
Epoch [5/50], Train Loss: 0.9415, Val Loss: 0.9434
Epoch [6/50], Train Loss: 0.9395, Val Loss: 0.9392
Epoch [7/50], Train Loss: 0.9381, Val Loss: 0.9404
Epoch [8/50], Train Loss: 0.9361, Val Loss: 0.9396
Epoch [9/50], Train Loss: 0.9350, Val Loss: 0.9373
Epoch [10/50], Train Loss: 0.9336, Val Loss: 0.9412
Epoch [11/50], Train Loss: 0.9326, Val Loss: 0.9386
Epoch [12/50], Train Loss: 0.9313, Val Loss: 0.9346
Epoch [13/50], Train Loss: 0.9304, Val Loss: 0.9349
Epoch [14/50], Train Loss: 0.9295, Val Loss: 0.9350
Epoch [15/50], Train Loss: 0.9288, Val Loss: 0.9342
Epoch [16/50], Train Loss: 0.9280, Val Loss: 0.9348
Epoch [17/50], Train Loss: 0.9272, Val Loss: 0.9391
Epoch [18/50], Train Loss: 0.9264, Val Loss: 0.9331
Epoch [19/50], Train Loss: 0.9258, Val Loss: 0.9325
Epoch [20/50], Train 

In [45]:
torch.save(model.state_dict(), 'pth/chess_model.pth')

In [43]:
# Make sure the model is in evaluation mode
model.eval()

with torch.no_grad():
    logits = model(X)                # Forward pass: get raw logits
    probabilities = torch.softmax(logits, dim=1)  # Convert logits to probabilities
    # Compute weighted score: probability of win (class 2) minus probability of loss (class 0)
    weighted_score = probabilities[:, 2] - probabilities[:, 0]

print(weighted_score)


tensor([ 0.0765,  0.0813, -0.0924,  ...,  0.3564, -0.1260,  0.4368])


In [44]:
weighted_score

tensor([ 0.0765,  0.0813, -0.0924,  ...,  0.3564, -0.1260,  0.4368])