# Model 4: Ensemble Method (Literature - Paper A)

This notebook implements a weighted ensemble of Ridge, Random Forest, and XGBoost, based on literature recommendations.

## 1. Imports and Setup

In [1]:
import sys
import pickle
import pandas as pd
from pathlib import Path
# Add parent directory to path to import src
sys.path.append(str(Path('..').resolve()))
from src.models import (
LinearRegressionModel, 
RandomForestModel, 
XGBoostModel,
compute_regression_metrics
)
print("="*70)
print("MODEL 4: ENSEMBLE METHOD (Literature - Paper A)")
print("="*70)
print("Approach: Weighted ensemble of Ridge, Random Forest, and XGBoost")
# Load data
data_dir = Path('..').resolve() / 'data' / 'processed'
X_train = pd.read_csv(data_dir / 'multi_X_train.csv')
y_train = pd.read_csv(data_dir / 'multi_y_train.csv').values.ravel()
X_test = pd.read_csv(data_dir / 'multi_X_test.csv')
y_test = pd.read_csv(data_dir / 'multi_y_test.csv').values.ravel()
feature_names = X_train.columns.tolist()
# Initialize models
ridge_model = LinearRegressionModel(model_type='ridge', random_state=42)
rf_model = RandomForestModel(random_state=42)
xgb_model = XGBoostModel(random_state=42)
# Train (using best params for speed here, assume tuning done)
print("\nTraining base models...")
ridge_model.train(X_train.values, y_train, feature_names=feature_names, tune_hyperparams=False, verbose=0)
rf_model.train(X_train.values, y_train, feature_names=feature_names, tune_hyperparams=False, verbose=0)
xgb_model.train(X_train.values, y_train, feature_names=feature_names, tune_hyperparams=False, verbose=0)
# Predict
print("\nGenerating predictions...")
ridge_pred = ridge_model.predict(X_test.values)
rf_pred = rf_model.predict(X_test.values)
xgb_pred = xgb_model.predict(X_test.values)
# Weighted Ensemble
# Weights from paper/experiments: XGB: 0.5, RF: 0.3, Ridge: 0.2
ensemble_pred = 0.50 * xgb_pred + 0.30 * rf_pred + 0.20 * ridge_pred
# Evaluate
metrics = compute_regression_metrics(y_test, ensemble_pred)
print("\n[Ensemble Model] Test Set Performance:")
print(f"  R² Score:  {metrics['R2']:.4f}")
print(f"  RMSE:      {metrics['RMSE']:.4f}")
# Save results
results = {
    'model_name': 'Ensemble (Paper A)',
    'predictions': ensemble_pred,
    'metrics': metrics
}
with open(data_dir / 'model_04_ensemble.pkl', 'wb') as f:
    pickle.dump(results, f)
print(f"\n✓ Results saved to {data_dir / 'model_04_ensemble.pkl'}")

MODEL 4: ENSEMBLE METHOD (Literature - Paper A)
Approach: Weighted ensemble of Ridge, Random Forest, and XGBoost

Training base models...



Generating predictions...

[Ensemble Model] Test Set Performance:
  R² Score:  0.9059
  RMSE:      0.1243

✓ Results saved to /Users/himanshishrivas/Documents/IntroMLCapstone/data/processed/model_04_ensemble.pkl
