In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
from sklearn.calibration import CalibratedClassifierCV
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, log_loss, confusion_matrix, classification_report

import xgboost as xgb

import time

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

In [3]:
df = pd.read_csv("final2.csv")
df.head()

Unnamed: 0,team_size,team_A_avg_mmr,team_B_avg_mmr,team_A_civs,team_B_civs,map_name,team_A_won,civ_A_vs_B_winrate,civ_A_vs_B_certainty,mmr_gap
0,1,2294.0,2297.0,french,house_of_lancaster,Dry Arabia,0,42.993025,0.503591,-3.0
1,1,2290.0,2165.0,mongols,chinese,Gorge,1,53.023663,0.112949,125.0
2,1,2282.0,2173.0,japanese,chinese,Carmel,1,59.821429,0.135038,109.0
3,1,1958.0,2279.0,ottomans,japanese,Lipany,0,44.265081,0.116866,-321.0
4,1,1632.0,2279.0,holy_roman_empire,english,Dry Arabia,0,51.759944,0.607073,-647.0


In [4]:
X = df.drop(columns=["team_A_won"])
y = df["team_A_won"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

categorical_cols = ['team_A_civs', 'team_B_civs', 'map_name']

# Define numerical columns (all others except target)
numerical_cols = [col for col in X_train.columns if col not in categorical_cols]

# Define preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

**First Predictor - Logistic Regression**


In [6]:
def train_and_evaluate(model, X_train, X_test, y_train, y_test,
                       categorical_features, numeric_features):
    """
    Trains and evaluates a model using a pipeline. Times the process.

    Returns: dict with accuracy, f1, log_loss, runtime
    """
    # Preprocessor: scale numerics, one-hot encode categoricals
    preprocessor = ColumnTransformer(transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

    # Full pipeline: preprocessing + model
    pipe = Pipeline([
        ('preprocess', preprocessor),
        ('model', model)
    ])

    
    start_time = time.time()
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    y_prob = pipe.predict_proba(X_test)[:, 1]  # For log_loss
    runtime = time.time() - start_time
    
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    loss = log_loss(y_test, y_prob)

    return {
        'accuracy': acc,
        'f1_score': f1,
        'log_loss': loss,
        'runtime': runtime,
        'model': model.__class__.__name__
    }
    


**Second model: XGBoost**

In [7]:
def train_evaluate_xgboost(X_train, X_test, y_train, y_test):
    start_time = time.time()

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', xgb.XGBClassifier(
            n_estimators=1500,
            learning_rate=0.1,
            max_depth=4,
            use_label_encoder=False,
            eval_metric='logloss',
            random_state=42
        ))
    ])
    pipeline.fit(X_train, y_train)
    preds = pipeline.predict(X_test)

    end_time = time.time()
    runtime = end_time - start_time

    acc = accuracy_score(y_test, preds)
    cm = confusion_matrix(y_test, preds)
    report = classification_report(y_test, preds)

    return {
        'accuracy': acc,
        "confusion_matrix":cm,
        'runtime': runtime,
        'model': "XGBoost"
    }

**Third Model: Neural Network**

In [13]:
def train_and_evaluate_nn(X_train_raw, X_test_raw, y_train, y_test,
                          categorical_features, numeric_features,
                          input_dim=None, epochs=50, batch_size=32):
    """
    Trains and evaluates a PyTorch neural network model.
    Returns: dict with accuracy, f1, log_loss, runtime
    """
    

    X_train = preprocessor.fit_transform(X_train_raw)
    X_test = preprocessor.transform(X_test_raw)

    # ==== PyTorch Dataset ====
    class AoeDataset(Dataset):
        def __init__(self, X, y):
            self.X = torch.tensor(X.toarray() if hasattr(X, "toarray") else X, dtype=torch.float32)
            self.y = torch.tensor(y.to_numpy().reshape(-1, 1), dtype=torch.float32)

        def __len__(self): return len(self.y)
        def __getitem__(self, idx): return self.X[idx], self.y[idx]

    train_dataset = AoeDataset(X_train, y_train)
    test_dataset = AoeDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # ==== Model ====
    class AoeNet(nn.Module):
        def __init__(self, input_dim):
            super().__init__()
            self.net = nn.Sequential(
                nn.Linear(input_dim, 128),
                nn.ReLU(),
                nn.BatchNorm1d(128),  # helps with training stability

                nn.Dropout(0.3),
                nn.Linear(128, 64),
                nn.ReLU(),

                nn.Dropout(0.3),
                nn.Linear(64, 32),
                nn.ReLU(),

                nn.Dropout(0.3),
                nn.Linear(32, 1),
                nn.Sigmoid()
        )

        def forward(self, x): return self.net(x)

    def init_weights(m):
        if isinstance(m, nn.Linear):
            nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
            nn.init.zeros_(m.bias)
        
    model = AoeNet(input_dim or X_train.shape[1])
    model.apply(init_weights)
    loss_fn = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # ==== Training ====
    start_time = time.time()
    model.train()
    for epoch in range(epochs):
        for X_batch, y_batch in train_loader:
            preds = model(X_batch)
            loss = loss_fn(preds, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    runtime = time.time() - start_time

    # ==== Evaluation ====
    model.eval()
    y_probs, y_true = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            y_probs.extend(outputs.squeeze().numpy())
            y_true.extend(y_batch.squeeze().numpy())

    y_preds = [1 if p >= 0.5 else 0 for p in y_probs]

    # ==== Metrics ====
    acc = accuracy_score(y_true, y_preds)
    f1 = f1_score(y_true, y_preds)
    loss = log_loss(y_true, y_probs)

    return {
        'accuracy': acc,
        'f1_score': f1,
        'log_loss': loss,
        'runtime': runtime,
        'model': "PyTorchNN"
    }


In [12]:
logreg = LogisticRegression(solver='lbfgs', max_iter=50)

log_results = train_and_evaluate(logreg, X_train, X_test, y_train, y_test,
                             categorical_cols, numerical_cols)
print(log_results)
    
xgb_results = train_evaluate_xgboost(X_train, X_test, y_train, y_test)
print(xgb_results)

nn_results = train_and_evaluate_nn(
    X_train_raw=X_train,
    X_test_raw=X_test,
    y_train=y_train,
    y_test=y_test,
    categorical_features=categorical_cols,
    numeric_features=numerical_cols
)

print(nn_results)

{'accuracy': 0.7515644555694618, 'f1_score': 0.6826538768984812, 'log_loss': 0.4921482479313774, 'runtime': 0.062001705169677734, 'model': 'LogisticRegression'}


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


{'accuracy': 0.9974968710888611, 'confusion_matrix': array([[897,   0],
       [  4, 697]], dtype=int64), 'runtime': 1.2619960308074951, 'model': 'XGBoost'}
{'accuracy': 0.9036295369211514, 'f1_score': 0.8884057971014493, 'log_loss': 0.22601403286253133, 'runtime': 23.34657645225525, 'model': 'PyTorchNN'}


So far we only worked with binary classifiers, now we need to adapt the models so that they can also predict a probability and not just the winner.

In [14]:
def train_evaluate_xgboost_calibrated(X_train, X_test, y_train, y_test, preprocessor, plot_curve=True):
    start_time = time.time()

    # 🧹 Preprocess (fit only on train)
    X_train_transformed = preprocessor.fit_transform(X_train)
    X_test_transformed = preprocessor.transform(X_test)

    # 📦 Base XGBoost model with conservative settings
    base_model = xgb.XGBClassifier(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=3,
        use_label_encoder=False,
        eval_metric='logloss',
        random_state=42
    )

    # 🔄 Wrap in calibrated classifier (sigmoid is smoother)
    calibrated = CalibratedClassifierCV(estimator=base_model, method='sigmoid', cv=5)
    calibrated.fit(X_train_transformed, y_train)

    # 🔮 Predictions
    y_prob = calibrated.predict_proba(X_test_transformed)[:, 1]
    y_pred = calibrated.predict(X_test_transformed)

    end_time = time.time()
    runtime = end_time - start_time

    # 📊 Metrics
    acc = accuracy_score(y_test, y_pred)
    loss = log_loss(y_test, y_prob)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    # 📈 Calibration curve
    if plot_curve:
        prob_true, prob_pred = calibration_curve(y_test, y_prob, n_bins=10)
        plt.figure(figsize=(6, 6))
        plt.plot(prob_pred, prob_true, marker='o', label='XGBoost (Calibrated)')
        plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfect Calibration')
        plt.title("Calibration Curve")
        plt.xlabel("Predicted Probability")
        plt.ylabel("Empirical Probability")
        plt.legend()
        plt.grid(True)
        plt.show()

    # 📉 Confidence Score: avg distance from 0.5
    avg_confidence = float(np.mean(np.abs(y_prob - 0.5)))  # Range: 0 (totally uncertain) to 0.5 (max confident)

    return {
        'accuracy': acc,
        'log_loss': loss,
        'confusion_matrix': cm,
        'classification_report': report,
        'runtime': runtime,
        'average_confidence': avg_confidence,
        'model': "XGBoost (Calibrated)",
        'y_pred': y_pred,
        'y_prob': y_prob,
        'y_true': y_test
    }
