# Ensemble Model - Implementation

## Features

- **EnsembleModel**: Combine multiple base models with weighted averaging
- **StackedEnsemble**: Meta-learning with stacking regressor
- **EnsembleGoalPredictor**: Dual ensemble for home/away goals
- Weight optimization (equal, inverse-error, scipy-optimized)
- Multiple combination methods (mean, median, min, max)

## Ensemble Strategies

| Strategy | Description | Best For |
|----------|-------------|----------|
| Simple Averaging | Equal weights | Diverse models |
| Weighted Averaging | Custom weights | Known model quality |
| Inverse-Error | Weight by validation error | Auto-tuning |
| Stacking | Meta-learner combines | Maximum performance |

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import Ridge, ElasticNet, Lasso
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.base import clone
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Ensemble methods ready!")

## Generate Sample Data

In [None]:
def generate_hockey_data(n_games=1000):
    """Generate synthetic hockey data with realistic features."""
    np.random.seed(42)
    
    data = {
        'home_elo': np.random.normal(1500, 100, n_games),
        'away_elo': np.random.normal(1500, 100, n_games),
        'home_goals_avg': np.random.uniform(2.5, 3.5, n_games),
        'away_goals_avg': np.random.uniform(2.5, 3.5, n_games),
        'home_goals_against_avg': np.random.uniform(2.5, 3.5, n_games),
        'away_goals_against_avg': np.random.uniform(2.5, 3.5, n_games),
        'home_pp_pct': np.random.uniform(0.15, 0.30, n_games),
        'away_pp_pct': np.random.uniform(0.15, 0.30, n_games),
        'home_pk_pct': np.random.uniform(0.75, 0.90, n_games),
        'away_pk_pct': np.random.uniform(0.75, 0.90, n_games),
    }
    
    df = pd.DataFrame(data)
    df['elo_diff'] = df['home_elo'] - df['away_elo']
    
    # Generate realistic goals
    home_base = 3.0 + 0.001 * df['elo_diff'] + 0.3 * (df['home_goals_avg'] - 3.0)
    away_base = 3.0 - 0.001 * df['elo_diff'] + 0.3 * (df['away_goals_avg'] - 3.0)
    
    df['home_goals'] = np.random.poisson(np.maximum(home_base, 1.5))
    df['away_goals'] = np.random.poisson(np.maximum(away_base, 1.5))
    
    return df

games_df = generate_hockey_data(1000)
print(f"Generated {len(games_df)} games")

# Prepare features
feature_cols = [c for c in games_df.columns if c not in ['home_goals', 'away_goals']]
X = games_df[feature_cols]
y = games_df['home_goals']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

## Train Individual Base Models

In [None]:
# Define base models
base_models = {
    'ridge': Ridge(alpha=1.0),
    'elastic': ElasticNet(alpha=0.1, l1_ratio=0.5),
    'rf': RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1),
    'gbm': GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, random_state=42),
}

# Train each model
fitted_models = {}
model_predictions = {}
model_val_rmse = {}

for name, model in base_models.items():
    fitted = clone(model).fit(X_train, y_train)
    fitted_models[name] = fitted
    
    # Predictions
    val_pred = fitted.predict(X_val)
    test_pred = fitted.predict(X_test)
    model_predictions[name] = test_pred
    
    # Validation RMSE
    val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
    model_val_rmse[name] = val_rmse
    
    test_rmse = np.sqrt(mean_squared_error(y_test, test_pred))
    print(f"{name:10s} - Val RMSE: {val_rmse:.4f}, Test RMSE: {test_rmse:.4f}")

## Simple Averaging Ensemble

In [None]:
class SimpleEnsemble:
    """Simple averaging ensemble."""
    
    def __init__(self, models):
        self.models = models  # dict of name: fitted_model
    
    def predict(self, X):
        """Average predictions from all models."""
        predictions = np.column_stack([m.predict(X) for m in self.models.values()])
        return predictions.mean(axis=1)
    
    def evaluate(self, X, y):
        pred = self.predict(X)
        return {
            'rmse': np.sqrt(mean_squared_error(y, pred)),
            'mae': mean_absolute_error(y, pred),
            'r2': r2_score(y, pred),
        }

# Create simple ensemble
simple_ensemble = SimpleEnsemble(fitted_models)
simple_metrics = simple_ensemble.evaluate(X_test, y_test)

print("\nSimple Averaging Ensemble:")
for k, v in simple_metrics.items():
    print(f"  {k}: {v:.4f}")

## Weighted Averaging Ensemble

In [None]:
class WeightedEnsemble:
    """Weighted averaging ensemble."""
    
    def __init__(self, models, weights=None):
        self.models = models
        self.weights = weights
        if weights is None:
            # Equal weights
            n = len(models)
            self.weights = {name: 1/n for name in models.keys()}
    
    def predict(self, X):
        """Weighted average of predictions."""
        predictions = np.zeros(len(X))
        for name, model in self.models.items():
            predictions += self.weights[name] * model.predict(X)
        return predictions
    
    def evaluate(self, X, y):
        pred = self.predict(X)
        return {
            'rmse': np.sqrt(mean_squared_error(y, pred)),
            'mae': mean_absolute_error(y, pred),
            'r2': r2_score(y, pred),
        }

# Compute inverse-error weights
inverse_errors = {name: 1/rmse for name, rmse in model_val_rmse.items()}
total = sum(inverse_errors.values())
weights = {name: v/total for name, v in inverse_errors.items()}

print("Inverse-Error Weights:")
for name, w in weights.items():
    print(f"  {name}: {w:.4f}")

# Create weighted ensemble
weighted_ensemble = WeightedEnsemble(fitted_models, weights)
weighted_metrics = weighted_ensemble.evaluate(X_test, y_test)

print("\nWeighted Ensemble Metrics:")
for k, v in weighted_metrics.items():
    print(f"  {k}: {v:.4f}")

## Optimized Weights with Scipy

In [None]:
from scipy.optimize import minimize

# Get validation predictions from all models
val_preds = np.column_stack([m.predict(X_val) for m in fitted_models.values()])

def objective(weights):
    """MSE of weighted combination."""
    combined = val_preds @ weights
    return mean_squared_error(y_val, combined)

# Constraints: weights sum to 1, all >= 0
n_models = len(fitted_models)
constraints = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}
bounds = [(0, 1) for _ in range(n_models)]
initial = np.ones(n_models) / n_models

# Optimize
result = minimize(objective, initial, method='SLSQP', bounds=bounds, constraints=constraints)
optimized_weights = dict(zip(fitted_models.keys(), result.x))

print("Optimized Weights:")
for name, w in optimized_weights.items():
    print(f"  {name}: {w:.4f}")

# Evaluate optimized ensemble
opt_ensemble = WeightedEnsemble(fitted_models, optimized_weights)
opt_metrics = opt_ensemble.evaluate(X_test, y_test)

print("\nOptimized Ensemble Metrics:")
for k, v in opt_metrics.items():
    print(f"  {k}: {v:.4f}")

## Stacking Ensemble (Meta-Learning)

In [None]:
# Create stacking regressor
estimators = [
    ('ridge', Ridge(alpha=1.0)),
    ('elastic', ElasticNet(alpha=0.1, l1_ratio=0.5)),
    ('rf', RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)),
    ('gbm', GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, random_state=42)),
]

stacking = StackingRegressor(
    estimators=estimators,
    final_estimator=Ridge(alpha=0.1),
    cv=5,
    passthrough=False,  # Only use base model predictions
    n_jobs=-1
)

# Fit on train+val (stacking handles CV internally)
X_train_full = pd.concat([X_train, X_val])
y_train_full = pd.concat([y_train, y_val])

print("Fitting stacking ensemble (this may take a moment)...")
stacking.fit(X_train_full, y_train_full)

# Evaluate
stack_pred = stacking.predict(X_test)
stack_rmse = np.sqrt(mean_squared_error(y_test, stack_pred))
stack_mae = mean_absolute_error(y_test, stack_pred)
stack_r2 = r2_score(y_test, stack_pred)

print(f"\nStacking Ensemble Metrics:")
print(f"  RMSE: {stack_rmse:.4f}")
print(f"  MAE:  {stack_mae:.4f}")
print(f"  R²:   {stack_r2:.4f}")

In [None]:
# Inspect meta-learner weights
meta_model = stacking.final_estimator_
if hasattr(meta_model, 'coef_'):
    meta_weights = pd.Series(meta_model.coef_, index=[n for n, _ in estimators])
    print("Meta-Learner (Ridge) Weights:")
    print(meta_weights)
    
    # Plot
    plt.figure(figsize=(8, 4))
    meta_weights.plot(kind='bar', color='steelblue')
    plt.title('Stacking Meta-Learner Weights')
    plt.ylabel('Weight')
    plt.xticks(rotation=45)
    plt.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    plt.tight_layout()
    plt.show()

## Compare All Ensemble Methods

In [None]:
# Collect all results
comparison = {
    'Ridge (single)': np.sqrt(mean_squared_error(y_test, fitted_models['ridge'].predict(X_test))),
    'RF (single)': np.sqrt(mean_squared_error(y_test, fitted_models['rf'].predict(X_test))),
    'GBM (single)': np.sqrt(mean_squared_error(y_test, fitted_models['gbm'].predict(X_test))),
    'Simple Average': simple_metrics['rmse'],
    'Inverse-Error Weighted': weighted_metrics['rmse'],
    'Optimized Weights': opt_metrics['rmse'],
    'Stacking': stack_rmse,
}

# Plot comparison
plt.figure(figsize=(12, 6))
methods = list(comparison.keys())
rmses = list(comparison.values())
colors = ['lightgray'] * 3 + ['steelblue'] * 3 + ['forestgreen']

bars = plt.barh(methods, rmses, color=colors)
plt.xlabel('Test RMSE (lower is better)')
plt.title('Ensemble Method Comparison')

# Add value labels
for bar, rmse in zip(bars, rmses):
    plt.text(rmse + 0.01, bar.get_y() + bar.get_height()/2, 
             f'{rmse:.4f}', va='center')

plt.tight_layout()
plt.show()

print("\nResults Summary:")
for method, rmse in sorted(comparison.items(), key=lambda x: x[1]):
    print(f"  {method:25s}: {rmse:.4f}")

## EnsembleGoalPredictor (Dual Model)

In [None]:
class EnsembleGoalPredictor:
    """Dual ensemble for home and away goal prediction."""
    
    def __init__(self, base_models, weights='inverse_error'):
        self.base_models = base_models  # List of (name, model) tuples
        self.weights = weights
        self.home_models = {}
        self.away_models = {}
        self.home_weights = None
        self.away_weights = None
        self.feature_columns = None
    
    def fit(self, df, feature_columns=None, val_fraction=0.2):
        """Fit ensembles for home and away goals."""
        # Get features
        if feature_columns:
            self.feature_columns = feature_columns
        else:
            exclude = ['home_goals', 'away_goals']
            self.feature_columns = [c for c in df.select_dtypes(include=[np.number]).columns
                                    if c not in exclude]
        
        X = df[self.feature_columns]
        y_home = df['home_goals']
        y_away = df['away_goals']
        
        # Split for weight optimization
        n_val = int(len(df) * val_fraction)
        X_train, X_val = X.iloc[:-n_val], X.iloc[-n_val:]
        y_home_train, y_home_val = y_home.iloc[:-n_val], y_home.iloc[-n_val:]
        y_away_train, y_away_val = y_away.iloc[:-n_val], y_away.iloc[-n_val:]
        
        # Fit home models
        home_errors = {}
        for name, model in self.base_models:
            fitted = clone(model).fit(X_train, y_home_train)
            self.home_models[name] = fitted
            val_pred = fitted.predict(X_val)
            home_errors[name] = np.sqrt(mean_squared_error(y_home_val, val_pred))
        
        # Fit away models
        away_errors = {}
        for name, model in self.base_models:
            fitted = clone(model).fit(X_train, y_away_train)
            self.away_models[name] = fitted
            val_pred = fitted.predict(X_val)
            away_errors[name] = np.sqrt(mean_squared_error(y_away_val, val_pred))
        
        # Compute weights
        if self.weights == 'inverse_error':
            home_inv = {n: 1/e for n, e in home_errors.items()}
            away_inv = {n: 1/e for n, e in away_errors.items()}
            home_total = sum(home_inv.values())
            away_total = sum(away_inv.values())
            self.home_weights = {n: v/home_total for n, v in home_inv.items()}
            self.away_weights = {n: v/away_total for n, v in away_inv.items()}
        else:
            n = len(self.base_models)
            self.home_weights = {n: 1/n for n, _ in self.base_models}
            self.away_weights = {n: 1/n for n, _ in self.base_models}
        
        return self
    
    def predict_goals(self, df):
        """Predict home and away goals."""
        X = df[self.feature_columns]
        
        home_pred = np.zeros(len(X))
        for name, model in self.home_models.items():
            home_pred += self.home_weights[name] * model.predict(X)
        
        away_pred = np.zeros(len(X))
        for name, model in self.away_models.items():
            away_pred += self.away_weights[name] * model.predict(X)
        
        return home_pred, away_pred
    
    def evaluate(self, df):
        """Evaluate prediction performance."""
        home_pred, away_pred = self.predict_goals(df)
        return {
            'home_rmse': np.sqrt(mean_squared_error(df['home_goals'], home_pred)),
            'away_rmse': np.sqrt(mean_squared_error(df['away_goals'], away_pred)),
            'home_mae': mean_absolute_error(df['home_goals'], home_pred),
            'away_mae': mean_absolute_error(df['away_goals'], away_pred),
        }

In [None]:
# Train dual ensemble predictor
base_models = [
    ('ridge', Ridge(alpha=1.0)),
    ('rf', RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)),
    ('gbm', GradientBoostingRegressor(n_estimators=100, max_depth=4, random_state=42)),
]

predictor = EnsembleGoalPredictor(base_models)
predictor.fit(games_df, feature_cols)

print("\nHome Model Weights:")
for name, w in predictor.home_weights.items():
    print(f"  {name}: {w:.4f}")

print("\nAway Model Weights:")
for name, w in predictor.away_weights.items():
    print(f"  {name}: {w:.4f}")

# Evaluate
metrics = predictor.evaluate(games_df)
print("\nDual Ensemble Metrics:")
for k, v in metrics.items():
    print(f"  {k}: {v:.4f}")

## Summary

### Key Takeaways

1. **Simple Averaging** works well when models have similar accuracy
2. **Inverse-Error Weighting** automatically favors better models
3. **Optimized Weights** can find the best linear combination
4. **Stacking** learns non-linear combinations via meta-model

### Best Practices

- Use diverse models (linear + tree-based) for best ensembles
- Always use validation set for weight optimization (not test set!)
- Stacking with CV prevents overfitting to training data
- More models ≠ better ensemble (diminishing returns)