# üìä Analyse du Backtest NBA 2025-26

Ce notebook analyse les r√©sultats du backtesting r√©aliste sans data leakage.

**M√©thodologie** :
- Simulation jour par jour
- Recalcul des features sans data leakage
- Comparaison avec r√©sultats r√©els
- M√©triques compl√®tes (accuracy, precision, recall, AUC, Brier)

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from datetime import datetime

# Configuration des plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

%matplotlib inline

## 1. Chargement des donn√©es

In [None]:
# Trouver le fichier de backtest le plus r√©cent
predictions_dir = Path('../predictions')
backtest_files = list(predictions_dir.glob('backtest_2025-26_*.json'))

if not backtest_files:
    print("‚ùå Aucun fichier de backtest trouv√©. Lancez d'abord :")
    print("   python scripts/run_backtest.py")
else:
    # Charger le fichier le plus r√©cent
    latest_file = max(backtest_files, key=lambda p: p.stat().st_mtime)
    print(f"üìÅ Chargement: {latest_file.name}")
    
    with open(latest_file, 'r') as f:
        results = json.load(f)
    
    print(f"‚úì Donn√©es charg√©es: {len(results['predictions'])} pr√©dictions")

## 2. R√©sum√© des performances

In [None]:
# M√©triques globales
summary = results['summary']

print("üìà M√©triques Globales")
print("="*50)
print(f"Total matchs analys√©s: {summary['total_games']}")
print(f"Coverage: {summary['coverage']:.1%}")
print(f"")
print(f"Accuracy:  {summary['accuracy']:.2%}")
print(f"Precision: {summary['precision']:.2%}")
print(f"Recall:    {summary['recall']:.2%}")
print(f"F1-Score:  {summary['f1']:.2%}")
print(f"")
print(f"AUC:         {summary['auc']:.4f}")
print(f"Brier Score: {summary['brier_score']:.4f}")
print("="*50)

## 3. √âvolution de l'Accuracy dans le temps

In [None]:
# Cr√©er DataFrame des pr√©dictions
df = pd.DataFrame(results['predictions'])
df['game_date'] = pd.to_datetime(df['game_date'])
df['cumulative_accuracy'] = df['is_correct'].expanding().mean()

# Plot
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(df['game_date'], df['cumulative_accuracy'], linewidth=2, label='Accuracy cumul√©e')
ax.axhline(y=summary['accuracy'], color='r', linestyle='--', 
           label=f"Accuracy finale: {summary['accuracy']:.2%}")
ax.axhline(y=0.5, color='gray', linestyle=':', alpha=0.5, label='Random (50%)')

ax.set_xlabel('Date')
ax.set_ylabel('Accuracy cumul√©e')
ax.set_title('√âvolution de l\'Accuracy au fil de la saison')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Performance par Mois

In [None]:
# Pr√©parer donn√©es par mois
by_month = pd.DataFrame.from_dict(results['by_month'], orient='index')
by_month.index = pd.to_datetime(by_month.index)
by_month = by_month.sort_index()

# Plot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Accuracy par mois
ax1.bar(range(len(by_month)), by_month['accuracy'], color='steelblue', alpha=0.7)
ax1.axhline(y=summary['accuracy'], color='r', linestyle='--', 
            label=f"Moyenne: {summary['accuracy']:.2%}")
ax1.set_xticks(range(len(by_month)))
ax1.set_xticklabels([d.strftime('%Y-%m') for d in by_month.index], rotation=45)
ax1.set_ylabel('Accuracy')
ax1.set_title('Accuracy par Mois')
ax1.legend()
ax1.grid(True, alpha=0.3, axis='y')

# Nombre de matchs par mois
ax2.bar(range(len(by_month)), by_month['games'], color='lightcoral', alpha=0.7)
ax2.set_xticks(range(len(by_month)))
ax2.set_xticklabels([d.strftime('%Y-%m') for d in by_month.index], rotation=45)
ax2.set_ylabel('Nombre de matchs')
ax2.set_xlabel('Mois')
ax2.set_title('Volume de matchs par Mois')
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## 5. Performance par Niveau de Confiance

In [None]:
# Pr√©parer donn√©es par confiance
by_conf = pd.DataFrame.from_dict(results['by_confidence'], orient='index')

# Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Accuracy par niveau
colors = ['#2ecc71', '#f39c12', '#e74c3c']
ax1.bar(by_conf.index, by_conf['accuracy'], color=colors, alpha=0.8)
ax1.axhline(y=summary['accuracy'], color='black', linestyle='--', 
            label=f"Global: {summary['accuracy']:.2%}")
ax1.set_ylabel('Accuracy')
ax1.set_title('Accuracy par Niveau de Confiance')
ax1.legend()
ax1.grid(True, alpha=0.3, axis='y')

# Distribution des matchs
ax2.pie(by_conf['games'], labels=by_conf.index, autopct='%1.1f%%', 
        colors=colors, startangle=90)
ax2.set_title('Distribution des Pr√©dictions par Confiance')

plt.tight_layout()
plt.show()

# Tableau d√©taill√©
print("\nüìä D√©tail par Niveau de Confiance")
print("="*60)
for level, stats in by_conf.iterrows():
    print(f"{level:8s}: {stats['accuracy']:.2%} ({stats['correct']:.0f}/{stats['games']:.0f})")
print("="*60)

## 6. Performance par √âquipe (Top 10)

In [None]:
# Pr√©parer donn√©es par √©quipe (min 10 pr√©dictions)
by_team = pd.DataFrame.from_dict(results['by_team'], orient='index')
by_team = by_team[by_team['predicted'] >= 10]  # Filtre
by_team = by_team.sort_values('accuracy', ascending=False)

# Top 10
top_10 = by_team.head(10)

fig, ax = plt.subplots(figsize=(12, 8))

y_pos = range(len(top_10))
colors = plt.cm.RdYlGn(top_10['accuracy'])

ax.barh(y_pos, top_10['accuracy'], color=colors, alpha=0.8)
ax.set_yticks(y_pos)
ax.set_yticklabels(top_10.index, fontsize=9)
ax.invert_yaxis()
ax.set_xlabel('Accuracy')
ax.set_title('Top 10 √âquipes - Accuracy de Pr√©diction\n(min. 10 pr√©dictions)')
ax.grid(True, alpha=0.3, axis='x')

# Ajouter les valeurs
for i, (idx, row) in enumerate(top_10.iterrows()):
    ax.text(row['accuracy'] + 0.01, i, 
            f"{row['accuracy']:.1%} ({row['correct']:.0f}/{row['predicted']:.0f})",
            va='center', fontsize=8)

plt.tight_layout()
plt.show()

## 7. Matrice de Confusion

In [None]:
from sklearn.metrics import confusion_matrix

# Calculer matrice
y_true = [1 if p['actual_winner'] == 'HOME' else 0 for p in results['predictions']]
y_pred = [1 if p['predicted_winner'] == 'Home Win' else 0 for p in results['predictions']]

cm = confusion_matrix(y_true, y_pred)

# Plot
fig, ax = plt.subplots(figsize=(8, 6))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
            xticklabels=['Away Win (Pred)', 'Home Win (Pred)'],
            yticklabels=['Away Win (Real)', 'Home Win (Real)'])

ax.set_ylabel('R√©sultat R√©el')
ax.set_xlabel('Pr√©diction')
ax.set_title('Matrice de Confusion')

plt.tight_layout()
plt.show()

# Calculer les pourcentages
tn, fp, fn, tp = cm.ravel()
total = tn + fp + fn + tp

print(f"\nüìä Matrice de Confusion (%)")
print(f"Vrais N√©gatifs (Away correct):  {tn/total:.1%}")
print(f"Faux Positifs (Home pr√©dit, Away r√©el): {fp/total:.1%}")
print(f"Faux N√©gatifs (Away pr√©dit, Home r√©el): {fn/total:.1%}")
print(f"Vrais Positifs (Home correct):  {tp/total:.1%}")

## 8. Distribution des Probabilit√©s

In [None]:
# S√©parer pr√©dictions correctes et incorrectes
correct = df[df['is_correct'] == True]['proba_home_win']
incorrect = df[df['is_correct'] == False]['proba_home_win']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Histogramme
ax1.hist(correct, bins=30, alpha=0.6, label='Correct', color='green', density=True)
ax1.hist(incorrect, bins=30, alpha=0.6, label='Incorrect', color='red', density=True)
ax1.axvline(x=0.5, color='black', linestyle='--', label='Seuil (50%)')
ax1.set_xlabel('Probabilit√© Home Win')
ax1.set_ylabel('Densit√©')
ax1.set_title('Distribution des Probabilit√©s')
ax1.legend()

# Box plot
data_to_plot = [correct, incorrect]
bp = ax2.boxplot(data_to_plot, labels=['Correct', 'Incorrect'], patch_artist=True)
bp['boxes'][0].set_facecolor('green')
bp['boxes'][0].set_alpha(0.6)
bp['boxes'][1].set_facecolor('red')
bp['boxes'][1].set_alpha(0.6)
ax2.axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
ax2.set_ylabel('Probabilit√© Home Win')
ax2.set_title('Distribution par R√©sultat')
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print(f"\nüìä Statistiques des Probabilit√©s")
print(f"Correct - Moyenne: {correct.mean():.3f}, √âcart-type: {correct.std():.3f}")
print(f"Incorrect - Moyenne: {incorrect.mean():.3f}, √âcart-type: {incorrect.std():.3f}")

## 9. Calibration des Probabilit√©s

In [None]:
from sklearn.calibration import calibration_curve

# Calculer courbe de calibration
y_true = [1 if p['actual_winner'] == 'HOME' else 0 for p in results['predictions']]
y_proba = [p['proba_home_win'] for p in results['predictions']]

prob_true, prob_pred = calibration_curve(y_true, y_proba, n_bins=10)

fig, ax = plt.subplots(figsize=(8, 8))

ax.plot(prob_pred, prob_true, 's-', label='Mod√®le', markersize=8)
ax.plot([0, 1], [0, 1], 'k--', label='Parfaitement calibr√©')
ax.set_xlabel('Probabilit√© pr√©dite')
ax.set_ylabel('Fraction de positifs')
ax.set_title('Courbe de Calibration')
ax.legend(loc='lower right')
ax.grid(True, alpha=0.3)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])

plt.tight_layout()
plt.show()

print(f"Brier Score: {summary['brier_score']:.4f}")
print("(Plus proche de 0 = mieux calibr√©)")

## 10. Export des R√©sultats

Les r√©sultats sont d√©j√† sauvegard√©s dans les fichiers :
- JSON complet avec toutes les m√©triques
- CSV avec les pr√©dictions d√©taill√©es
- R√©sum√© textuel

Vous pouvez les retrouver dans : `../predictions/`

In [None]:
# Afficher les fichiers g√©n√©r√©s
predictions_dir = Path('../predictions')
backtest_files = sorted(predictions_dir.glob('backtest_*.json'))[-3:]  # 3 derniers

print("üìÅ Fichiers de backtest disponibles :")
for f in backtest_files:
    size_mb = f.stat().st_size / (1024 * 1024)
    mtime = datetime.fromtimestamp(f.stat().st_mtime)
    print(f"  {f.name:<40} {size_mb:>6.2f} MB  {mtime.strftime('%Y-%m-%d %H:%M')}")

---

## üìù Conclusion

Ce backtest r√©aliste (sans data leakage) donne une estimation fiable des performances du mod√®le en production.

**Interpr√©tation des r√©sultats :**
- Accuracy > 70% : Bon mod√®le
- Accuracy > 75% : Excellent mod√®le
- Brier Score < 0.2 : Bonne calibration
- High Confidence > 80% : Tr√®s fiable quand confiant

**Recommandations :**
- Se fier principalement aux pr√©dictions HIGH confidence
- √âviter de parier sur LOW confidence
- Surveiller les √©quipes avec faible accuracy