# Enhanced Euro 2024 Prediction Model Training

This notebook demonstrates the enhanced training pipeline for Euro 2024 match predictions.

## Features
- Advanced feature engineering with attack/defense strength metrics
- Multiple algorithm comparison with hyperparameter tuning
- Comprehensive model evaluation
- Feature importance analysis
- Model versioning and persistence

In [None]:
# Import the enhanced training system
import sys
sys.path.append('../scripts')

from train_enhanced_model import EuroFootballPredictor
from predict_euro2024 import Euro2024Predictor

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 1. Train Enhanced Models

Run the complete training pipeline with enhanced features and model selection.

In [None]:
# Initialize the predictor
trainer = EuroFootballPredictor(data_path='../data/')

# Run complete training pipeline
training_results = trainer.run_complete_training()

## 2. Analyze Training Results

In [None]:
# Display training results
print("Training Results Summary:")
print(f"Result Model Accuracy: {training_results['result_accuracy']:.3f}")
print(f"Goals Model R²: {training_results['goals_r2']:.3f}")
print(f"Goals Model RMSE: {training_results['goals_rmse']:.3f}")

## 3. Make Euro 2024 Predictions

Use the trained models to predict Euro 2024 group stage matches.

In [None]:
# Initialize predictor with enhanced models
predictor = Euro2024Predictor(model_version='v2_enhanced')

# Run predictions
predictions = predictor.run_prediction(save_results=True)

## 4. Analyze Predictions

In [None]:
# Load the predictions for analysis
predictions_df = pd.read_csv('../data/euro2024_predictions_v2_enhanced.csv')

# Basic statistics
print("Prediction Statistics:")
print(f"Total matches: {len(predictions_df)}")
print(f"Home wins predicted: {len(predictions_df[predictions_df['predicted_result'] == '1'])}")
print(f"Draws predicted: {len(predictions_df[predictions_df['predicted_result'] == 'X'])}")
print(f"Away wins predicted: {len(predictions_df[predictions_df['predicted_result'] == '2'])}")
print(f"Average goals per match: {predictions_df['predicted_goals'].mean():.2f}")
print(f"Matches with >2.5 goals: {len(predictions_df[predictions_df['predicted_goals'] > 2.5])}")

In [None]:
# Visualize goal predictions
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Goals distribution
ax1.hist(predictions_df['predicted_goals'], bins=15, alpha=0.7, edgecolor='black')
ax1.axvline(predictions_df['predicted_goals'].mean(), color='red', linestyle='--', 
           label=f'Mean: {predictions_df["predicted_goals"].mean():.2f}')
ax1.axvline(2.5, color='orange', linestyle='--', label='Over/Under 2.5')
ax1.set_xlabel('Predicted Goals')
ax1.set_ylabel('Number of Matches')
ax1.set_title('Distribution of Predicted Goals')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Confidence distribution
ax2.hist(predictions_df['confidence'], bins=15, alpha=0.7, edgecolor='black')
ax2.axvline(predictions_df['confidence'].mean(), color='red', linestyle='--', 
           label=f'Mean: {predictions_df["confidence"].mean():.2f}')
ax2.axvline(0.6, color='orange', linestyle='--', label='High Confidence (0.6)')
ax2.set_xlabel('Prediction Confidence')
ax2.set_ylabel('Number of Matches')
ax2.set_title('Distribution of Prediction Confidence')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Show high confidence predictions
high_conf = predictions_df[predictions_df['confidence'] >= 0.6].copy()
high_conf = high_conf.sort_values('confidence', ascending=False)

print(f"\n🎯 HIGH CONFIDENCE PREDICTIONS ({len(high_conf)} matches):")
print("=" * 70)

for _, match in high_conf.iterrows():
    result_map = {'1': 'Home Win', '2': 'Away Win', 'X': 'Draw'}
    result_text = result_map[match['predicted_result']]
    
    print(f"{match['home_team']} vs {match['away_team']}")
    print(f"  📅 {match['date']} in {match['city']}")
    print(f"  🎯 Prediction: {result_text} (Confidence: {match['confidence']:.1%})")
    print(f"  ⚽ Expected Goals: {match['predicted_goals']:.1f}")
    print()

## 5. Compare with Original Models

Let's compare predictions from the enhanced model vs the original model.

In [None]:
# Make predictions with original model
original_predictor = Euro2024Predictor(model_version='original')
original_predictions = original_predictor.run_prediction(save_results=False)

# Compare key metrics
comparison = pd.DataFrame({
    'Model': ['Original', 'Enhanced'],
    'Avg_Goals': [original_predictions['predicted_goals'].mean(), 
                  predictions_df['predicted_goals'].mean()],
    'High_Confidence_Predictions': [
        len(original_predictions[original_predictions['confidence'] >= 0.6]),
        len(predictions_df[predictions_df['confidence'] >= 0.6])
    ],
    'Over_2.5_Goals': [
        len(original_predictions[original_predictions['predicted_goals'] > 2.5]),
        len(predictions_df[predictions_df['predicted_goals'] > 2.5])
    ]
})

print("Model Comparison:")
print(comparison.to_string(index=False))

## 6. Betting Strategy Analysis

Identify the best betting opportunities based on model confidence.

In [None]:
# Define betting strategies
def analyze_betting_opportunities(df, min_confidence=0.6):
    """
    Analyze betting opportunities based on prediction confidence
    """
    opportunities = []
    
    for _, match in df.iterrows():
        if match['confidence'] >= min_confidence:
            bet_type = ""
            bet_confidence = match['confidence']
            
            # Result betting
            if match['predicted_result'] == '1':
                bet_type = f"{match['home_team']} to win"
            elif match['predicted_result'] == '2':
                bet_type = f"{match['away_team']} to win"
            else:
                bet_type = "Draw"
            
            opportunities.append({
                'match': f"{match['home_team']} vs {match['away_team']}",
                'date': match['date'],
                'bet_type': bet_type,
                'confidence': bet_confidence,
                'predicted_goals': match['predicted_goals']
            })
            
            # Goals betting
            if match['predicted_goals'] > 2.7:  # High confidence over 2.5
                opportunities.append({
                    'match': f"{match['home_team']} vs {match['away_team']}",
                    'date': match['date'],
                    'bet_type': 'Over 2.5 Goals',
                    'confidence': bet_confidence,
                    'predicted_goals': match['predicted_goals']
                })
            elif match['predicted_goals'] < 2.2:  # High confidence under 2.5
                opportunities.append({
                    'match': f"{match['home_team']} vs {match['away_team']}",
                    'date': match['date'],
                    'bet_type': 'Under 2.5 Goals',
                    'confidence': bet_confidence,
                    'predicted_goals': match['predicted_goals']
                })
    
    return pd.DataFrame(opportunities)

# Analyze betting opportunities
betting_opps = analyze_betting_opportunities(predictions_df)
betting_opps = betting_opps.sort_values('confidence', ascending=False)

print("🎰 BETTING OPPORTUNITIES (Confidence ≥ 60%):")
print("=" * 80)
print(betting_opps.to_string(index=False))

## Summary

This enhanced training system provides:

1. **Better Features**: Attack/defense strength ratios and differentials
2. **Model Selection**: Automatic selection of best-performing algorithms
3. **Hyperparameter Tuning**: Grid search for optimal parameters
4. **Comprehensive Evaluation**: Multiple metrics and visualizations
5. **Betting Analysis**: Confidence-based opportunity identification

The models are saved with versioning for easy comparison and deployment.