In [2]:
import sys
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.metrics import log_loss, brier_score_loss, accuracy_score
from sklearn.calibration import calibration_curve
import torch
import warnings

# Add current directory to path
sys.path.insert(0, '.')

# Import custom modules
from features import TennisFeatureExtractor
from ml_models.neural_network import (
    SymmetricNeuralNetwork,
    NeuralNetworkTrainer,
    BaggingEnsemble
)

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

print("‚úÖ Libraries loaded")
print(f"PyTorch version: {torch.__version__}")
print(f"Training started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

ModuleNotFoundError: No module named 'torch'

## 1. Load and Prepare Data

In [None]:
# Connect to database
conn = sqlite3.connect('tennis_data.db')

# Load matches
query = """
SELECT 
    m.match_id,
    m.tournament_date,
    m.surface,
    m.winner_id,
    m.loser_id,
    m.best_of,
    CASE WHEN m.winner_id < m.loser_id THEN 1 ELSE 2 END as winner
FROM matches m
WHERE m.tournament_date >= '2020-01-01'
    AND m.tournament_date < '2025-01-01'
    AND m.surface IS NOT NULL
ORDER BY m.tournament_date
"""

matches = pd.read_sql_query(query, conn)

print(f"Total matches: {len(matches):,}")
print(f"Date range: {matches['tournament_date'].min()} to {matches['tournament_date'].max()}")

In [None]:
# Generate features
feature_gen = MatchFeatures('tennis_data.db')

print("Generating features...\n")

features_list = []

for idx, match in matches.iterrows():
    if idx % 500 == 0:
        print(f"Processing match {idx}/{len(matches)}...")
    
    # Ensure player1_id < player2_id
    if match['winner_id'] < match['loser_id']:
        player1_id = match['winner_id']
        player2_id = match['loser_id']
    else:
        player1_id = match['loser_id']
        player2_id = match['winner_id']
    
    features = feature_gen.generate_features(
        player1_id,
        player2_id,
        match['surface'],
        match_date=match['tournament_date']
    )
    
    features['match_id'] = match['match_id']
    features['tournament_date'] = match['tournament_date']
    features['winner'] = match['winner']
    
    features_list.append(features)

df_features = pd.DataFrame(features_list)

print(f"\n‚úÖ Generated features for {len(df_features)} matches")

In [None]:
# Split data
df_features['year'] = df_features['tournament_date'].str[:4]

train_df = df_features[df_features['year'].isin(['2020', '2021'])].copy()
val_df = df_features[df_features['year'] == '2022'].copy()
test_df = df_features[df_features['year'].isin(['2023', '2024'])].copy()

print("Data split:")
print(f"  Training (2020-2021):   {len(train_df):,}")
print(f"  Validation (2022):      {len(val_df):,}")
print(f"  Test (2023-2024):       {len(test_df):,}")

## 2. Prepare Features (Exclude RANK, POINTS)

In [None]:
# Get feature names
all_cols = df_features.columns.tolist()
feature_cols = [col for col in all_cols if col.startswith('player1_')]
feature_names = [col.replace('player1_', '') for col in feature_cols]

# Exclude RANK and POINTS
excluded_features = ['RANK', 'POINTS']
available_features = [f for f in feature_names if f not in excluded_features]

print(f"Total features: {len(feature_names)}")
print(f"Excluded: {excluded_features}")
print(f"Available: {len(available_features)}")
print(f"\nFeatures: {available_features}")

## 3. Train Single Neural Network (Baseline)

In [None]:
print("Training single neural network...\n")

single_nn = NeuralNetworkTrainer(
    n_features=len(available_features),
    learning_rate=0.0004,
    momentum=0.55,
    weight_decay=0.002,
    patience=10,
    verbose=True
)

single_nn.fit(train_df, val_df, available_features, max_epochs=100)

In [None]:
# Evaluate single model
from sklearn.metrics import accuracy_score, log_loss, brier_score_loss

test_pred_single = single_nn.predict(test_df, available_features)
test_actuals = (test_df['winner'] == 1).astype(int).values

single_accuracy = accuracy_score(test_actuals, test_pred_single.round())
single_log_loss = log_loss(test_actuals, test_pred_single)
single_brier = brier_score_loss(test_actuals, test_pred_single)

print("\nSingle Neural Network Performance:")
print(f"  Accuracy:    {single_accuracy:.2%}")
print(f"  Log Loss:    {single_log_loss:.4f}")
print(f"  Brier Score: {single_brier:.4f}")

## 4. Train Ensemble with Bagging (20 Models)

In [None]:
# Train ensemble of 20 neural networks
ensemble_models, ensemble_stats = train_nn_ensemble(
    train_df,
    val_df,
    available_features,
    n_bags=20,
    learning_rate=0.0004,
    momentum=0.55,
    weight_decay=0.002,
    patience=10,
    max_epochs=100,
    verbose=False  # Set to False to reduce output
)

## 5. Evaluate Ensemble

In [None]:
# Ensemble predictions
test_pred_ensemble = predict_ensemble(ensemble_models, test_df, available_features)

ensemble_accuracy = accuracy_score(test_actuals, test_pred_ensemble.round())
ensemble_log_loss = log_loss(test_actuals, test_pred_ensemble)
ensemble_brier = brier_score_loss(test_actuals, test_pred_ensemble)

print("=" * 70)
print("ENSEMBLE VS SINGLE MODEL COMPARISON")
print("=" * 70)
print(f"\n{'Metric':<20} {'Single NN':<15} {'Ensemble (20)':<15} {'Improvement'}")
print("-" * 70)
print(f"{'Accuracy':<20} {single_accuracy:<15.2%} {ensemble_accuracy:<15.2%} {(ensemble_accuracy-single_accuracy)*100:+.2f}%")
print(f"{'Log Loss':<20} {single_log_loss:<15.4f} {ensemble_log_loss:<15.4f} {(single_log_loss-ensemble_log_loss):+.4f}")
print(f"{'Brier Score':<20} {single_brier:<15.4f} {ensemble_brier:<15.4f} {(single_brier-ensemble_brier):+.4f}")
print("\n" + "=" * 70)

## 6. Learning Curves

In [None]:
# Plot learning curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Single model learning curve
single_history = single_nn.get_history()
epochs_single = range(1, len(single_history['train_loss']) + 1)

ax1.plot(epochs_single, single_history['train_loss'], 'b-', label='Training Loss', linewidth=2)
ax1.plot(epochs_single, single_history['val_loss'], 'r-', label='Validation Loss', linewidth=2)
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss (Binary Cross-Entropy)', fontsize=12)
ax1.set_title('Single Neural Network - Learning Curve', fontsize=14, fontweight='bold')
ax1.legend(fontsize=11)
ax1.grid(True, alpha=0.3)

# Ensemble learning curves (all models)
for i, (train_losses, val_losses) in enumerate(zip(ensemble_stats['train_losses'], 
                                                     ensemble_stats['val_losses'])):
    epochs = range(1, len(train_losses) + 1)
    ax2.plot(epochs, train_losses, 'b-', alpha=0.2, linewidth=1)
    ax2.plot(epochs, val_losses, 'r-', alpha=0.2, linewidth=1)

# Average curves
max_epochs = max([len(losses) for losses in ensemble_stats['train_losses']])
avg_train = []
avg_val = []
for epoch in range(max_epochs):
    train_at_epoch = [losses[epoch] for losses in ensemble_stats['train_losses'] 
                     if epoch < len(losses)]
    val_at_epoch = [losses[epoch] for losses in ensemble_stats['val_losses'] 
                   if epoch < len(losses)]
    avg_train.append(np.mean(train_at_epoch))
    avg_val.append(np.mean(val_at_epoch))

epochs_avg = range(1, len(avg_train) + 1)
ax2.plot(epochs_avg, avg_train, 'b-', label='Avg Training Loss', linewidth=3)
ax2.plot(epochs_avg, avg_val, 'r-', label='Avg Validation Loss', linewidth=3)

ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Loss (Binary Cross-Entropy)', fontsize=12)
ax2.set_title('Ensemble (20 Models) - Learning Curves', fontsize=14, fontweight='bold')
ax2.legend(fontsize=11)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('nn_learning_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Learning curves saved: nn_learning_curves.png")

## 7. Feature Importance (Permutation)

In [None]:
# Calculate feature importance
importance_df = calculate_permutation_importance(
    ensemble_models,
    val_df,
    available_features,
    n_repeats=5
)

print("\nFeature Importance (Permutation):")
print(importance_df.to_string(index=False))

In [None]:
# Visualize feature importance
fig, ax = plt.subplots(figsize=(12, 8))

colors = plt.cm.RdYlGn(np.linspace(0.2, 0.8, len(importance_df)))
bars = ax.barh(importance_df['feature'], importance_df['importance'], 
              color=colors, alpha=0.7, edgecolor='black')

# Add error bars
ax.errorbar(importance_df['importance'], importance_df['feature'],
           xerr=importance_df['std'], fmt='none', ecolor='black', 
           capsize=3, alpha=0.5)

ax.set_xlabel('Importance (Increase in Log-Loss)', fontsize=12, fontweight='bold')
ax.set_title('Neural Network Feature Importance\n(via Permutation)', 
            fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig('nn_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Feature importance plot saved: nn_feature_importance.png")

## 8. Prediction Distribution & Calibration

In [None]:
# Calibration analysis
bins = np.linspace(0, 1, 11)
bin_centers = (bins[:-1] + bins[1:]) / 2

bin_indices = np.digitize(test_pred_ensemble, bins) - 1
bin_indices = np.clip(bin_indices, 0, len(bin_centers) - 1)

calibration_data = []
for i in range(len(bin_centers)):
    mask = bin_indices == i
    if mask.sum() > 0:
        actual_rate = test_actuals[mask].mean()
        predicted_rate = test_pred_ensemble[mask].mean()
        count = mask.sum()
        calibration_data.append({
            'predicted': predicted_rate,
            'actual': actual_rate,
            'count': count
        })

calib_df = pd.DataFrame(calibration_data)

# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Calibration curve
ax1.plot([0, 1], [0, 1], 'r--', linewidth=2, label='Perfect Calibration')
ax1.scatter(calib_df['predicted'], calib_df['actual'], 
           s=calib_df['count']*2, alpha=0.6, color='blue')
ax1.plot(calib_df['predicted'], calib_df['actual'], 'b-', linewidth=1, alpha=0.5)

ax1.set_xlabel('Predicted Probability', fontsize=12)
ax1.set_ylabel('Actual Win Rate', fontsize=12)
ax1.set_title('Calibration Curve\n(size = number of matches)', fontsize=14, fontweight='bold')
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
ax1.set_xlim(0, 1)
ax1.set_ylim(0, 1)

# Prediction distribution
ax2.hist(test_pred_ensemble, bins=30, alpha=0.7, color='blue', edgecolor='black')
ax2.axvline(x=0.5, color='red', linestyle='--', linewidth=2, label='50% threshold')
ax2.set_xlabel('Predicted Probability', fontsize=12)
ax2.set_ylabel('Frequency', fontsize=12)
ax2.set_title('Ensemble Prediction Distribution', fontsize=14, fontweight='bold')
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('nn_calibration.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Calibration plot saved: nn_calibration.png")

calib_error = np.abs(calib_df['predicted'] - calib_df['actual']).mean()
print(f"\nMean Calibration Error: {calib_error:.4f}")

## 9. Performance by Surface

In [None]:
# Get surface information
test_with_surface = test_df.merge(matches[['match_id', 'surface']], on='match_id')

surface_results = []

for surface in ['Hard', 'Clay', 'Grass']:
    mask = test_with_surface['surface'] == surface
    if mask.sum() == 0:
        continue
    
    surface_probs = test_pred_ensemble[mask]
    surface_actuals = test_actuals[mask]
    
    surface_results.append({
        'Surface': surface,
        'Matches': mask.sum(),
        'Accuracy': accuracy_score(surface_actuals, surface_probs.round()),
        'Log Loss': log_loss(surface_actuals, surface_probs)
    })

surface_df = pd.DataFrame(surface_results)

print("\nPerformance by Surface:")
print(surface_df.to_string(index=False))

## 10. Save Ensemble Model

In [None]:
import pickle

# Save ensemble
ensemble_data = {
    'models': ensemble_models,
    'features': available_features,
    'ensemble_stats': ensemble_stats,
    'test_metrics': {
        'accuracy': ensemble_accuracy,
        'log_loss': ensemble_log_loss,
        'brier_score': ensemble_brier,
        'calibration_error': calib_error
    },
    'feature_importance': importance_df
}

with open('ml_models/nn_ensemble.pkl', 'wb') as f:
    pickle.dump(ensemble_data, f)

print("‚úÖ Ensemble saved: ml_models/nn_ensemble.pkl")

## 11. Final Summary

In [None]:
print("=" * 80)
print("NEURAL NETWORK ENSEMBLE - FINAL REPORT")
print("=" * 80)
print(f"\nüìÖ Training completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"\nüèóÔ∏è  Architecture:")
print(f"  Input features: {len(available_features)}")
print(f"  Hidden neurons: 100 (tanh activation)")
print(f"  Output: 1 (sigmoid activation)")
print(f"  Bias terms: None (symmetric design)")
print(f"\nüéØ Training Configuration:")
print(f"  Optimizer: SGD (momentum=0.55)")
print(f"  Learning rate: 0.0004")
print(f"  Weight decay: 0.002")
print(f"  Batch size: 1 (online learning)")
print(f"  Early stopping: patience=10")
print(f"\nüé≤ Ensemble:")
print(f"  Number of models: {len(ensemble_models)}")
print(f"  Bagging: Bootstrap sampling")
print(f"  Prediction: Average of all models")
print(f"\nüìä Test Set Performance (2023-2024):")
print(f"  Test samples: {len(test_df):,}")
print(f"  Accuracy:      {ensemble_accuracy:.2%}")
print(f"  Log Loss:      {ensemble_log_loss:.4f}")
print(f"  Brier Score:   {ensemble_brier:.4f}")
print(f"  Calibration:   {calib_error:.4f}")
print(f"\nüìà Improvement over Single Model:")
print(f"  Accuracy:      {(ensemble_accuracy-single_accuracy)*100:+.2f}%")
print(f"  Log Loss:      {(single_log_loss-ensemble_log_loss):+.4f}")
print(f"  Brier Score:   {(single_brier-ensemble_brier):+.4f}")
print(f"\nüìÅ Files Generated:")
print(f"  ‚úÖ nn_learning_curves.png")
print(f"  ‚úÖ nn_feature_importance.png")
print(f"  ‚úÖ nn_calibration.png")
print(f"  ‚úÖ ml_models/nn_ensemble.pkl")
print("\n" + "=" * 80)

In [None]:
# Close connections
conn.close()
feature_gen.close()
print("\n‚úÖ Database connections closed")