In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
sys.path.insert(0, '../src')

from utils import (plot_title_race, plot_feature_importance,
                   generate_report, calculate_elo_ratings)

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

In [None]:
predictions = pd.read_csv('../data/predictions/predictions_latest.csv')
print(f"Loaded predictions for {len(predictions)} teams")
predictions.head(10)

In [None]:
plot_title_race(predictions, save_path='../data/predictions/title_race.png')

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Win probability
ax1 = axes[0]
top10 = predictions.head(10)
ax1.pie(top10['prob_win_league'], labels=top10['team'], autopct='%1.1f%%')
ax1.set_title('Title Win Probability')

# Top 4 probability
ax2 = axes[1]
sns.barplot(data=predictions.head(8), x='prob_top_4', y='team', ax=ax2, palette='Blues_r')
ax2.set_xlabel('Top 4 Probability (%)')
ax2.set_title('Champions League Qualification')

# Relegation risk
ax3 = axes[2]
bottom = predictions.nlargest(6, 'prob_relegation')
sns.barplot(data=bottom, x='prob_relegation', y='team', ax=ax3, palette='Reds')
ax3.set_xlabel('Relegation Probability (%)')
ax3.set_title('Relegation Battle')

plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))

teams = predictions['team'].values[:10]
points = predictions['predicted_points'].values[:10]
ci_low = predictions['points_90_ci_low'].values[:10] if 'points_90_ci_low' in predictions else points - 5
ci_high = predictions['points_90_ci_high'].values[:10] if 'points_90_ci_high' in predictions else points + 5

y_pos = np.arange(len(teams))

ax.barh(y_pos, points, alpha=0.7, label='Predicted Points')
ax.errorbar(points, y_pos, xerr=[points-ci_low, ci_high-points], fmt='none', color='black', capsize=3)

ax.set_yticks(y_pos)
ax.set_yticklabels(teams)
ax.set_xlabel('Points')
ax.set_title('Projected Final Points with 90% Confidence Interval')
ax.invert_yaxis()

plt.tight_layout()
plt.show()

In [None]:
features = pd.read_csv('../data/processed/features_gw17.csv')
print(f"Features shape: {features.shape}")

# Correlation matrix
feature_cols = [c for c in features.columns if c != 'team']
corr = features[feature_cols].corr()

fig, ax = plt.subplots(figsize=(14, 12))
sns.heatmap(corr, cmap='RdBu_r', center=0, ax=ax, xticklabels=True, yticklabels=True)
ax.set_title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
gameweek = 17
remaining = 38 - gameweek
confidence_score = (gameweek / 38) * 100

print(f"Current Gameweek: {gameweek}")
print(f"Games Remaining: {remaining}")
print(f"Data Completeness: {confidence_score:.1f}%")

# Calculate prediction uncertainty
if 'points_90_ci_low' in predictions:
    uncertainty = (predictions['points_90_ci_high'] - predictions['points_90_ci_low']).mean()
    print(f"Average Uncertainty Range: Â±{uncertainty/2:.1f} points")

In [None]:
standings = pd.read_csv('../data/processed/standings_with_projection_gw17.csv')

comparison = predictions[['team', 'predicted_points']].merge(
    standings[['team', 'points']], on='team'
)
comparison['points_to_gain'] = comparison['predicted_points'] - comparison['points']
comparison = comparison.sort_values('predicted_points', ascending=False)

print("Current vs Projected Points:")
print(comparison.head(10))

In [None]:
report = generate_report(predictions, gameweek=17, save_path='../data/predictions/report_gw17.txt')
print(report)

In [None]:
"""
To track predictions over time, run this after each gameweek:

1. Update config.yaml with new gameweek number
2. Run: python main.py --update --gameweek <N>
3. Run this notebook to analyze changes

The predictions history will show how the title race evolves.
"""

import glob
prediction_files = glob.glob('../data/predictions/predictions_gw*.csv')
print(f"Historical predictions available: {len(prediction_files)} gameweeks")

if len(prediction_files) > 1:
    history = []
    for f in sorted(prediction_files):
        df = pd.read_csv(f)
        gw = int(f.split('_gw')[1].split('_')[0])
        winner = df.iloc[0]
        history.append({
            'gameweek': gw,
            'winner': winner['team'],
            'points': winner['predicted_points']
        })

    history_df = pd.DataFrame(history)
    print("\nPredicted Winner by Gameweek:")
    print(history_df)