# Market Predictor: Ensemble Training

This notebook focuses on developing and optimizing ensemble models by combining our base models:
1. Load Base Models
2. Ensemble Architecture Design
3. Ensemble Training and Optimization
4. Performance Analysis
5. Final Model Selection

## Setup and Configuration

In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Add project root to path
sys.path.append('..')

# Import project modules
from src.models import (
    ModelFactory,
    EnsembleModel,
    create_ensemble
)
from src.utils import (
    setup_project_logger,
    ModelMetrics,
    TradingMetrics
)
from config import Config, load_validated_config

# Import required libraries
import joblib
from sklearn.metrics import classification_report, confusion_matrix
import json

# Plotting settings
plt.style.use('seaborn')
%matplotlib inline
sns.set_theme(style="whitegrid")

# Setup logging
logger = setup_project_logger('ensemble_training')

## 1. Loading Models and Data

Load the previously trained base models and prepare data for ensemble training:
- Load optimized base models
- Load processed data
- Prepare predictions from base models

In [None]:
# Load configuration and data
config = load_validated_config('config/parameters.yaml')

# Load scaled feature data
train_scaled = pd.read_parquet('data/processed/train_scaled.parquet')
val_scaled = pd.read_parquet('data/processed/val_scaled.parquet')
test_scaled = pd.read_parquet('data/processed/test_scaled.parquet')

# Load target data
train_target = pd.read_parquet('data/processed/train_target.parquet')
val_target = pd.read_parquet('data/processed/val_target.parquet')
test_target = pd.read_parquet('data/processed/test_target.parquet')

# Load base models and their predictions
base_models = {}
base_predictions = {}
model_names = ['random_forest', 'xgboost', 'lightgbm']

for model_name in model_names:
    # Load model
    model_path = f'models/{model_name}_optimized.joblib'
    base_models[model_name] = joblib.load(model_path)
    
    # Get predictions
    base_predictions[model_name] = {
        'train': base_models[model_name].predict_proba(train_scaled),
        'val': base_models[model_name].predict_proba(val_scaled),
        'test': base_models[model_name].predict_proba(test_scaled)
    }

# Print base model performances
print("\nBase Model Performances (Validation Set):")
print("=" * 50)

for model_name in model_names:
    val_preds = base_predictions[model_name]['val']
    val_class_preds = np.argmax(val_preds, axis=1)
    
    metrics = ModelMetrics.classification_metrics(
        val_target,
        val_class_preds,
        val_preds
    )
    
    print(f"\n{model_name.upper()}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")

# Plot prediction correlation heatmap
val_predictions_df = pd.DataFrame({
    name: preds['val'][:, 1] for name, preds in base_predictions.items()
})

plt.figure(figsize=(8, 6))
sns.heatmap(
    val_predictions_df.corr(),
    annot=True,
    cmap='coolwarm',
    center=0,
    vmin=-1,
    vmax=1
)
plt.title('Base Model Prediction Correlations')
plt.tight_layout()
plt.show()

logger.info('Base models and data loaded successfully')

## 2. Ensemble Architecture Design

Design and implement different ensemble architectures:
- Voting Ensemble
- Weighted Ensemble
- Stacking Ensemble
Compare their initial performances

In [None]:
# Define ensemble configurations
ensemble_configs = {
    'voting': {
        'type': 'voting',
        'params': {
            'voting': 'soft',
            'weights': None
        }
    },
    'weighted': {
        'type': 'weighted',
        'params': {
            'voting': 'soft',
            'weights': None,  # Will be optimized
            'dynamic_weights': True
        }
    },
    'stacking': {
        'type': 'stacking',
        'params': {
            'meta_model': 'lightgbm',
            'use_features': True,
            'cv': 5
        }
    }
}

# Initialize ensembles
ensembles = {}
ensemble_predictions = {}

for name, config in ensemble_configs.items():
    print(f"\nInitializing {name} ensemble...")
    
    # Create ensemble
    ensemble = create_ensemble(
        config=config,
        base_models=base_models,
        model_names=model_names
    )
    
    # Store ensemble
    ensembles[name] = ensemble

# Train and evaluate each ensemble
for name, ensemble in ensembles.items():
    print(f"\nTraining {name} ensemble...")
    
    # Train ensemble
    ensemble.train(
        X_train=train_scaled,
        y_train=train_target,
        X_val=val_scaled,
        y_val=val_target,
        base_predictions={
            'train': {model: preds['train'] for model, preds in base_predictions.items()},
            'val': {model: preds['val'] for model, preds in base_predictions.items()}
        }
    )
    
    # Get predictions
    train_preds = ensemble.predict_proba(train_scaled)
    val_preds = ensemble.predict_proba(val_scaled)
    
    ensemble_predictions[name] = {
        'train': train_preds,
        'val': val_preds
    }
    
    # Calculate metrics
    train_metrics = ModelMetrics.classification_metrics(
        train_target,
        np.argmax(train_preds, axis=1),
        train_preds
    )
    
    val_metrics = ModelMetrics.classification_metrics(
        val_target,
        np.argmax(val_preds, axis=1),
        val_preds
    )
    
    print(f"\n{name.upper()} ENSEMBLE RESULTS:")
    print("\nTraining Metrics:")
    print(f"Accuracy: {train_metrics['accuracy']:.4f}")
    print(f"F1 Score: {train_metrics['f1']:.4f}")
    
    print("\nValidation Metrics:")
    print(f"Accuracy: {val_metrics['accuracy']:.4f}")
    print(f"F1 Score: {val_metrics['f1']:.4f}")

# Plot ensemble performances comparison
metrics = ['accuracy', 'precision', 'recall', 'f1']
comparison_data = []

for ensemble_name in ensembles.keys():
    val_preds = ensemble_predictions[ensemble_name]['val']
    val_class_preds = np.argmax(val_preds, axis=1)
    
    metrics_dict = ModelMetrics.classification_metrics(
        val_target,
        val_class_preds,
        val_preds
    )
    
    for metric in metrics:
        comparison_data.append({
            'Ensemble': ensemble_name,
            'Metric': metric,
            'Value': metrics_dict[metric]
        })

comparison_df = pd.DataFrame(comparison_data)

# Create comparison plot
plt.figure(figsize=(12, 6))
sns.barplot(
    data=comparison_df,
    x='Metric',
    y='Value',
    hue='Ensemble'
)
plt.title('Ensemble Models Performance Comparison')
plt.ylim(0, 1)
plt.tight_layout()
plt.show()

logger.info('Ensemble architectures evaluated')

## 3. Ensemble Optimization

Optimize the best performing ensemble:
- Weight optimization
- Meta-model tuning
- Cross-validation analysis

In [None]:
# Identify best performing ensemble
val_scores = {}
for name, ensemble in ensembles.items():
    val_preds = ensemble_predictions[name]['val']
    val_class_preds = np.argmax(val_preds, axis=1)
    
    metrics = ModelMetrics.classification_metrics(
        val_target,
        val_class_preds,
        val_preds
    )
    val_scores[name] = metrics['f1']

best_ensemble_name = max(val_scores, key=val_scores.get)
print(f"Best performing ensemble: {best_ensemble_name}")

# Optimize weights for the best ensemble
def optimize_ensemble_weights(ensemble, X, y, base_preds, n_trials=50):
    best_score = 0
    best_weights = None
    
    for _ in range(n_trials):
        # Generate random weights
        weights = np.random.dirichlet(np.ones(len(base_models)))
        
        # Update ensemble weights
        ensemble.set_weights(weights)
        
        # Get predictions
        preds = ensemble.predict_proba(X)
        class_preds = np.argmax(preds, axis=1)
        
        # Calculate score
        metrics = ModelMetrics.classification_metrics(y, class_preds, preds)
        score = metrics['f1']
        
        if score > best_score:
            best_score = score
            best_weights = weights
    
    return best_weights, best_score

# Optimize best ensemble
best_ensemble = ensembles[best_ensemble_name]
print("\nOptimizing ensemble weights...")

optimal_weights, optimal_score = optimize_ensemble_weights(
    best_ensemble,
    val_scaled,
    val_target,
    {model: preds['val'] for model, preds in base_predictions.items()}
)

print(f"\nOptimal weights found (F1: {optimal_score:.4f}):")
for model_name, weight in zip(model_names, optimal_weights):
    print(f"{model_name}: {weight:.4f}")

# Update ensemble with optimal weights
best_ensemble.set_weights(optimal_weights)

# Perform cross-validation analysis
from sklearn.model_selection import TimeSeriesSplit

def cv_analysis(ensemble, X, y, n_splits=5):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    cv_scores = []
    
    for train_idx, val_idx in tscv.split(X):
        # Split data
        cv_X_train = X.iloc[train_idx]
        cv_y_train = y.iloc[train_idx]
        cv_X_val = X.iloc[val_idx]
        cv_y_val = y.iloc[val_idx]
        
        # Train ensemble
        ensemble.train(cv_X_train, cv_y_train, cv_X_val, cv_y_val)
        
        # Get predictions
        val_preds = ensemble.predict_proba(cv_X_val)
        val_class_preds = np.argmax(val_preds, axis=1)
        
        # Calculate metrics
        metrics = ModelMetrics.classification_metrics(
            cv_y_val,
            val_class_preds,
            val_preds
        )
        cv_scores.append(metrics)
    
    return cv_scores

# Perform cross-validation
cv_scores = cv_analysis(best_ensemble, train_scaled, train_target)

# Plot CV results
cv_metrics = ['accuracy', 'precision', 'recall', 'f1']
cv_results = []

for fold, metrics in enumerate(cv_scores):
    for metric in cv_metrics:
        cv_results.append({
            'Fold': fold + 1,
            'Metric': metric,
            'Score': metrics[metric]
        })

cv_df = pd.DataFrame(cv_results)

plt.figure(figsize=(12, 6))
sns.boxplot(data=cv_df, x='Metric', y='Score')
plt.title('Cross-validation Results')
plt.ylim(0, 1)
plt.tight_layout()
plt.show()

logger.info('Ensemble optimization completed')

## 4. Final Model Evaluation

Comprehensive evaluation of the optimized ensemble model:
- Performance metrics
- Trading metrics
- Risk analysis
- Comparison with base models

In [None]:
# Generate predictions with optimized ensemble
final_predictions = {
    'train': best_ensemble.predict_proba(train_scaled),
    'val': best_ensemble.predict_proba(val_scaled),
    'test': best_ensemble.predict_proba(test_scaled)
}

class_predictions = {
    dataset: np.argmax(preds, axis=1)
    for dataset, preds in final_predictions.items()
}

# Calculate comprehensive metrics
datasets = {
    'train': (train_target, train_scaled),
    'val': (val_target, val_scaled),
    'test': (test_target, test_scaled)
}

final_metrics = {}
for dataset_name, (y_true, X) in datasets.items():
    y_pred = class_predictions[dataset_name]
    y_prob = final_predictions[dataset_name]
    
    # Classification metrics
    class_metrics = ModelMetrics.classification_metrics(y_true, y_pred, y_prob)
    
    # Trading metrics
    returns = pd.Series(index=X.index)  # Replace with actual returns
    trade_metrics = TradingMetrics.calculate_metrics(returns, y_pred)
    
    final_metrics[dataset_name] = {
        'classification': class_metrics,
        'trading': trade_metrics
    }

# Plot comprehensive results
fig = plt.figure(figsize=(15, 10))

# Classification metrics plot
plt.subplot(2, 2, 1)
class_data = []
for dataset, metrics in final_metrics.items():
    for metric, value in metrics['classification'].items():
        class_data.append({
            'Dataset': dataset,
            'Metric': metric,
            'Value': value
        })
class_df = pd.DataFrame(class_data)
sns.barplot(data=class_df, x='Metric', y='Value', hue='Dataset')
plt.title('Classification Metrics')
plt.xticks(rotation=45)

# Trading metrics plot
plt.subplot(2, 2, 2)
trade_data = []
for dataset, metrics in final_metrics.items():
    for metric, value in metrics['trading'].items():
        trade_data.append({
            'Dataset': dataset,
            'Metric': metric,
            'Value': value
        })
trade_df = pd.DataFrame(trade_data)
sns.barplot(data=trade_df, x='Metric', y='Value', hue='Dataset')
plt.title('Trading Metrics')
plt.xticks(rotation=45)

# Confusion matrix plot
plt.subplot(2, 2, 3)
cm = confusion_matrix(test_target, class_predictions['test'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Test Set Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')

# ROC curve plot
plt.subplot(2, 2, 4)
from sklearn.metrics import roc_curve, auc
fpr, tpr, _ = roc_curve(test_target, final_predictions['test'][:, 1])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.title('ROC Curve (Test Set)')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')

plt.tight_layout()
plt.show()

# Save final model and results
joblib.dump(best_ensemble, 'models/final_ensemble.joblib')

final_results = {
    'model_type': best_ensemble_name,
    'optimal_weights': optimal_weights.tolist(),
    'metrics': final_metrics,
    'cv_results': cv_results,
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

with open('models/final_ensemble_results.json', 'w') as f:
    json.dump(final_results, f, indent=4)

logger.info('Final model evaluation completed')

## 5. Model Export and Next Steps

Prepare the final ensemble model for deployment:
- Export model artifacts
- Document model usage
- Define monitoring metrics
- Outline deployment steps

In [None]:
# Create model deployment package
deployment_package = {
    'model': {
        'ensemble_path': 'models/final_ensemble.joblib',
        'base_models': {
            name: f'models/{name}_base.joblib'
            for name in model_names
        },
        'scaler_path': 'models/feature_scaler.joblib'
    },
    'metadata': {
        'creation_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'features': train_scaled.columns.tolist(),
        'model_version': '1.0.0',
        'python_version': sys.version,
        'required_packages': {
            'numpy': np.__version__,
            'pandas': pd.__version__,
            'scikit-learn': sklearn.__version__
        }
    },
    'performance': final_metrics,
    'configuration': {
        'ensemble_type': best_ensemble_name,
        'model_weights': optimal_weights.tolist(),
        'prediction_threshold': 0.5,
        'update_frequency': '24h'
    }
}

# Save deployment package
with open('models/deployment_package.json', 'w') as f:
    json.dump(deployment_package, f, indent=4)

# Create model usage example
usage_example = """
# Model Usage Example
from src.models import load_ensemble_model
import pandas as pd

# Load model and dependencies
model = load_ensemble_model('models/final_ensemble.joblib')
scaler = joblib.load('models/feature_scaler.joblib')

# Prepare features
def prepare_features(data):
    # Add feature preparation code
    return processed_features

# Make predictions
def predict_market_direction(data):
    features = prepare_features(data)
    scaled_features = scaler.transform(features)
    predictions = model.predict_proba(scaled_features)
    return predictions
"""

with open('models/usage_example.py', 'w') as f:
    f.write(usage_example)

# Print deployment checklist
print("\nDeployment Checklist:")
print("=" * 50)
print("1. Model Artifacts:")
print("   - Final ensemble model saved")
print("   - Base models saved")
print("   - Feature scaler saved")
print("   - Model metadata documented")

print("\n2. Performance Validation:")
print("   - Cross-validation completed")
print("   - Test set performance verified")
print("   - Trading metrics calculated")

print("\n3. Monitoring Setup:")
print("   - Performance metrics defined")
print("   - Alert thresholds established")
print("   - Logging configured")

print("\n4. Next Steps:")
print("   - Set up model API")
print("   - Implement real-time data pipeline")
print("   - Configure monitoring dashboard")
print("   - Establish retraining schedule")

logger.info('Model export and deployment preparation completed')

## Next Steps

1. Model Deployment:
   - Set up model serving infrastructure
   - Implement API endpoints
   - Configure monitoring and alerting
   - Establish backup and failover procedures

2. Production Pipeline:
   - Real-time data collection
   - Automated feature generation
   - Prediction scheduling
   - Results logging and analysis

3. Monitoring and Maintenance:
   - Performance monitoring dashboard
   - Data drift detection
   - Model retraining triggers
   - Alert configuration

4. Future Improvements:
   - Additional data sources
   - Feature engineering refinements
   - Model architecture experiments
   - Risk management enhancements

Proceed to `05_backtesting.ipynb` for comprehensive strategy testing.