In [2]:
import sys
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pickle
import warnings
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler

# Import custom modules
from features import TennisFeatureExtractor

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

print("‚úÖ Libraries loaded")
print(f"Backtesting started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

‚úÖ Libraries loaded
Backtesting started at: 2025-12-28 04:54:20


## 1. Load Test Data (2023-2024)

In [3]:
# Connect to database
conn = sqlite3.connect('tennis_data.db')

# Load test matches (2023-2024)
query = """
SELECT 
    m.match_id,
    m.tournament_date,
    m.surface,
    m.winner_id,
    m.loser_id,
    m.best_of
FROM matches m
WHERE m.tournament_date >= '2023-01-01'
    AND m.tournament_date < '2025-01-01'
    AND m.surface IS NOT NULL
ORDER BY m.tournament_date
"""

test_matches = pd.read_sql_query(query, conn)

print(f"Test matches: {len(test_matches):,}")
print(f"Date range: {test_matches['tournament_date'].min()} to {test_matches['tournament_date'].max()}")

Test matches: 6,001
Date range: 2023-01-02 to 2024-12-18


In [5]:
# Load trained models
print("Loading trained models...")

# Load logistic regression model
with open('ml_models/logistic_regression_trained.pkl', 'rb') as f:
    lr_data = pickle.load(f)
    lr_model = lr_data['model']
    lr_features = lr_data['selected_features']

print(f"‚úÖ Logistic Regression: {len(lr_features)} features")
print(f"   Features: {lr_features}")

# Load neural network ensemble
with open('ml_models/neural_network_ensemble.pkl', 'rb') as f:
    nn_data = pickle.load(f)
    nn_scaler = nn_data['scaler']
    nn_features = nn_data['features']
    nn_hidden_dim = nn_data['hidden_dim']
    nn_model_states = nn_data['models']

# Recreate neural network architecture
class SymmetricNeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim=100):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim, bias=False)
        self.fc2 = nn.Linear(hidden_dim, 1, bias=False)
    
    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x.squeeze()

# Load ensemble models
nn_models = []
for state_dict in nn_model_states:
    model = SymmetricNeuralNetwork(len(nn_features), nn_hidden_dim)
    model.load_state_dict(state_dict)
    model.eval()
    nn_models.append(model)

print(f"‚úÖ Neural Network Ensemble: {len(nn_models)} models, {len(nn_features)} features")

Loading trained models...
‚úÖ Logistic Regression: 9 features
   Features: ['WIN_RATE_DIFF', 'DIRECT_H2H', 'SECOND_SERVE_WIN_PCT_DIFF', 'SURFACE_EXP_DIFF', 'COMPLETE_DIFF', 'BP_SAVE_DIFF', 'FIRST_SERVE_WIN_PCT_DIFF', 'FIRST_SERVE_PCT_DIFF', 'SURFACE_WIN_RATE_DIFF']
‚úÖ Neural Network Ensemble: 20 models, 17 features


## 2. Generate Simulated Odds

Since we don't have historical odds data, we'll simulate realistic odds based on statistical distributions.

In [7]:
# Extract features for test matches
print("Extracting features for test matches...")

feature_extractor = TennisFeatureExtractor('tennis_data.db')

# Get matches for feature extraction - use simpler approach
# Use the same approach as in the model training notebooks
test_query = """
    SELECT 
        m.match_id,
        m.tournament_date,
        m.surface,
        m.winner_id,
        m.loser_id,
        m.best_of,
        pw.player_name as winner_name,
        pl.player_name as loser_name
    FROM matches m
    LEFT JOIN players pw ON m.winner_id = pw.player_id
    LEFT JOIN players pl ON m.loser_id = pl.player_id
    WHERE m.tournament_date >= '2023-01-01'
        AND m.tournament_date < '2025-01-01'
        AND m.surface IS NOT NULL
    ORDER BY m.tournament_date
    LIMIT 2000
"""

test_matches_subset = pd.read_sql_query(test_query, conn)
print(f"Processing {len(test_matches_subset)} test matches...")

Extracting features for test matches...
Processing 2000 test matches...


## 3. Load Trained Models

In [8]:
# Extract features for each test match
print("Extracting features for backtesting...")

feature_rows = []
for idx, match in test_matches_subset.iterrows():
    if idx % 200 == 0:
        print(f"  Processing {idx}/{len(test_matches_subset)}...")
    
    try:
        features = feature_extractor.extract_features(
            match_id=match['match_id'],
            match_date=match['tournament_date']
        )
        features['actual_winner'] = 1  # Winner is always player 1 by construction
        feature_rows.append(features)
    except Exception as e:
        pass  # Skip matches with missing data

df_test_features = pd.DataFrame(feature_rows)
print(f"\n‚úÖ Extracted features for {len(df_test_features)} matches")

Extracting features for backtesting...
  Processing 0/2000...
  Processing 200/2000...
  Processing 400/2000...
  Processing 600/2000...
  Processing 800/2000...
  Processing 1000/2000...
  Processing 1200/2000...
  Processing 1400/2000...
  Processing 1600/2000...
  Processing 1800/2000...

‚úÖ Extracted features for 2000 matches


## 4. Generate Predictions from All Models

In [9]:
# Generate predictions from all models
print("=" * 60)
print("GENERATING PREDICTIONS FROM ALL MODELS")
print("=" * 60)

# Get feature matrices
def prepare_lr_features(df, feature_cols):
    """Prepare features for logistic regression."""
    X = df[feature_cols].values
    return X

def prepare_nn_features(df, feature_cols, scaler):
    """Prepare features for neural network."""
    X = df[feature_cols].values
    X_scaled = scaler.transform(X)
    return torch.FloatTensor(X_scaled)

def ensemble_predict(models, X_tensor):
    """Get ensemble predictions."""
    all_preds = []
    for model in models:
        model.eval()
        with torch.no_grad():
            preds = model(X_tensor).numpy()
            all_preds.append(preds)
    return np.mean(all_preds, axis=0)

# Logistic Regression predictions
print("\n1. Logistic Regression predictions...")
X_lr = prepare_lr_features(df_test_features, lr_features)
lr_probs = lr_model.predict_proba(X_lr)[:, 1]
print(f"   Mean prediction: {lr_probs.mean():.3f}")
print(f"   Std: {lr_probs.std():.3f}")

# Neural Network predictions
print("\n2. Neural Network predictions...")
X_nn = prepare_nn_features(df_test_features, nn_features, nn_scaler)
nn_probs = ensemble_predict(nn_models, X_nn)
print(f"   Mean prediction: {nn_probs.mean():.3f}")
print(f"   Std: {nn_probs.std():.3f}")

# Meta-Ensemble (weighted average)
print("\n3. Meta-Ensemble predictions...")
meta_weights = {'lr': 0.5, 'nn': 0.5}
meta_probs = meta_weights['lr'] * lr_probs + meta_weights['nn'] * nn_probs
print(f"   Mean prediction: {meta_probs.mean():.3f}")
print(f"   Std: {meta_probs.std():.3f}")

GENERATING PREDICTIONS FROM ALL MODELS

1. Logistic Regression predictions...


AttributeError: 'numpy.ndarray' object has no attribute 'columns'

## 5. Backtest Each Model with Kelly Strategy

In [None]:
# Backtest all models
initial_bankroll = 1000.0
strategy = 'kelly'

backtest_results = {}

for model_name in ['markov', 'logistic', 'neural_net', 'ensemble']:
    if model_name not in predictions_df.columns:
        continue
    
    print(f"\nBacktesting {model_name.upper()}...")
    
    # Prepare predictions dataframe
    model_preds = pd.DataFrame({
        'match_id': predictions_df['match_id'],
        'p_player1_win': predictions_df[model_name],
        'actual_winner': predictions_df['actual_winner']
    })
    
    # Backtest
    result = backtest_model(
        model_preds,
        odds_df,
        model_name=model_name,
        strategy=strategy,
        initial_bankroll=initial_bankroll
    )
    
    backtest_results[model_name] = result
    
    print(f"  ROI: {result['roi']:+.2%}")
    print(f"  Final Bankroll: ${result['final_bankroll']:.2f}")
    print(f"  Total Profit: ${result['total_profit']:+.2f}")
    print(f"  Num Bets: {result['num_bets']}")
    print(f"  Win Rate: {result['win_rate']:.2%}")
    print(f"  Sharpe Ratio: {result['sharpe_ratio']:.2f}")
    print(f"  Max Drawdown: {result['max_drawdown']:.2%}")

## 6. Comparison Table

In [None]:
# Create comparison table
comparison_data = []

for model_name, result in backtest_results.items():
    comparison_data.append({
        'Model': model_name.upper(),
        'ROI': f"{result['roi']:+.2%}",
        'Final Bankroll': f"${result['final_bankroll']:.2f}",
        'Total Profit': f"${result['total_profit']:+.2f}",
        'Num Bets': result['num_bets'],
        'Win Rate': f"{result['win_rate']:.2%}",
        'Avg Odds': f"{result['avg_odds']:.2f}",
        'Sharpe Ratio': f"{result['sharpe_ratio']:.2f}",
        'Max Drawdown': f"{result['max_drawdown']:.2%}"
    })

comparison_df = pd.DataFrame(comparison_data)

print("\n" + "=" * 100)
print("MODEL COMPARISON - KELLY CRITERION STRATEGY")
print("=" * 100)
print(comparison_df.to_string(index=False))
print("=" * 100)

## 7. Bankroll Evolution Plots

In [None]:
# Plot bankroll evolution for each model
for model_name, result in backtest_results.items():
    if len(result['bets_df']) > 0:
        plot_bankroll_evolution(
            result['bets_df'],
            initial_bankroll=initial_bankroll,
            title=f"{model_name.upper()} - Bankroll Evolution (Kelly Criterion)",
            save_path=f"backtesting_{model_name}_bankroll.png"
        )

## 8. Month-by-Month P&L Analysis

In [None]:
# Add dates to bets dataframes
for model_name, result in backtest_results.items():
    if len(result['bets_df']) > 0:
        # Merge with test matches to get dates
        bets_with_dates = result['bets_df'].merge(
            test_matches[['match_id', 'tournament_date']],
            on='match_id'
        )
        
        # Plot monthly P&L
        plot_monthly_pnl(
            bets_with_dates,
            date_column='tournament_date',
            title=f"{model_name.upper()} - Monthly P&L",
            save_path=f"backtesting_{model_name}_monthly_pnl.png"
        )

## 9. Strategy Comparison (Best Model)

In [None]:
# Find best model
best_model = max(backtest_results.items(), key=lambda x: x[1]['roi'])
best_model_name = best_model[0]

print(f"\nBest Model: {best_model_name.upper()}")
print(f"ROI: {best_model[1]['roi']:+.2%}\n")

# Compare strategies for best model
best_model_preds = pd.DataFrame({
    'match_id': predictions_df['match_id'],
    'p_player1_win': predictions_df[best_model_name],
    'actual_winner': predictions_df['actual_winner']
})

strategy_comparison = compare_strategies(
    best_model_preds,
    odds_df,
    strategies=['fixed', 'value', 'kelly'],
    initial_bankroll=initial_bankroll
)

print(f"Strategy Comparison for {best_model_name.upper()}:")
print(strategy_comparison.to_string(index=False))

## 10. Performance Visualization

In [None]:
# Visualize model comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

model_names = list(backtest_results.keys())
colors = plt.cm.Set3(np.linspace(0, 1, len(model_names)))

# Extract metrics
rois = [backtest_results[m]['roi'] * 100 for m in model_names]
sharpes = [backtest_results[m]['sharpe_ratio'] for m in model_names]
drawdowns = [backtest_results[m]['max_drawdown'] * 100 for m in model_names]
win_rates = [backtest_results[m]['win_rate'] * 100 for m in model_names]

# Plot 1: ROI
bars1 = axes[0, 0].bar([m.upper() for m in model_names], rois, color=colors, alpha=0.7, edgecolor='black')
axes[0, 0].axhline(y=0, color='red', linestyle='--', linewidth=1)
axes[0, 0].set_ylabel('ROI (%)', fontsize=12)
axes[0, 0].set_title('Return on Investment', fontsize=13, fontweight='bold')
axes[0, 0].grid(True, alpha=0.3, axis='y')
for bar, roi in zip(bars1, rois):
    height = bar.get_height()
    axes[0, 0].text(bar.get_x() + bar.get_width()/2., height,
                   f'{roi:+.1f}%', ha='center', va='bottom' if roi >= 0 else 'top', fontweight='bold')

# Plot 2: Sharpe Ratio
bars2 = axes[0, 1].bar([m.upper() for m in model_names], sharpes, color=colors, alpha=0.7, edgecolor='black')
axes[0, 1].set_ylabel('Sharpe Ratio', fontsize=12)
axes[0, 1].set_title('Risk-Adjusted Returns', fontsize=13, fontweight='bold')
axes[0, 1].grid(True, alpha=0.3, axis='y')
for bar, sharpe in zip(bars2, sharpes):
    height = bar.get_height()
    axes[0, 1].text(bar.get_x() + bar.get_width()/2., height,
                   f'{sharpe:.2f}', ha='center', va='bottom', fontweight='bold')

# Plot 3: Max Drawdown
bars3 = axes[1, 0].bar([m.upper() for m in model_names], drawdowns, color=colors, alpha=0.7, edgecolor='black')
axes[1, 0].set_ylabel('Max Drawdown (%)', fontsize=12)
axes[1, 0].set_title('Maximum Drawdown', fontsize=13, fontweight='bold')
axes[1, 0].grid(True, alpha=0.3, axis='y')
for bar, dd in zip(bars3, drawdowns):
    height = bar.get_height()
    axes[1, 0].text(bar.get_x() + bar.get_width()/2., height,
                   f'{dd:.1f}%', ha='center', va='bottom', fontweight='bold')

# Plot 4: Win Rate
bars4 = axes[1, 1].bar([m.upper() for m in model_names], win_rates, color=colors, alpha=0.7, edgecolor='black')
axes[1, 1].axhline(y=50, color='red', linestyle='--', linewidth=1, label='Break-even')
axes[1, 1].set_ylabel('Win Rate (%)', fontsize=12)
axes[1, 1].set_title('Betting Win Rate', fontsize=13, fontweight='bold')
axes[1, 1].grid(True, alpha=0.3, axis='y')
axes[1, 1].legend()
for bar, wr in zip(bars4, win_rates):
    height = bar.get_height()
    axes[1, 1].text(bar.get_x() + bar.get_width()/2., height,
                   f'{wr:.1f}%', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.savefig('backtesting_model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Model comparison plot saved: backtesting_model_comparison.png")

## 11. Final Summary Report

In [None]:
print("\n" + "=" * 100)
print("BACKTESTING SUMMARY REPORT")
print("=" * 100)
print(f"\nüìÖ Report Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"\nüìä Test Period: {test_matches['tournament_date'].min()} to {test_matches['tournament_date'].max()}")
print(f"Test Matches: {len(test_matches):,}")
print(f"\nüí∞ Initial Bankroll: ${initial_bankroll:.2f}")
print(f"Strategy: {strategy.upper()} (Kelly Criterion with 25% fractional sizing)")
print(f"\nüèÜ Best Performing Model: {best_model_name.upper()}")
print(f"  ROI: {best_model[1]['roi']:+.2%}")
print(f"  Final Bankroll: ${best_model[1]['final_bankroll']:.2f}")
print(f"  Total Profit: ${best_model[1]['total_profit']:+.2f}")
print(f"  Sharpe Ratio: {best_model[1]['sharpe_ratio']:.2f}")
print(f"  Max Drawdown: {best_model[1]['max_drawdown']:.2%}")
print(f"  Win Rate: {best_model[1]['win_rate']:.2%}")
print(f"  Number of Bets: {best_model[1]['num_bets']}")
print(f"\nüìà All Models Performance:")
print(comparison_df.to_string(index=False))
print(f"\nüìÅ Files Generated:")
for model_name in backtest_results.keys():
    print(f"  ‚úÖ backtesting_{model_name}_bankroll.png")
    print(f"  ‚úÖ backtesting_{model_name}_monthly_pnl.png")
print(f"  ‚úÖ backtesting_model_comparison.png")
print("\n" + "=" * 100)

In [None]:
# Close connections
conn.close()
feature_gen.close()
markov_model.close()
print("\n‚úÖ Database connections closed")