# Lab 1.6.4: Baseline Comparison Framework - SOLUTIONS

This notebook contains complete solutions to all exercises from Lab 1.6.4.

In [None]:
# Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time
from typing import Dict, List, Any, Optional, Callable
from dataclasses import dataclass
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, mean_squared_error, f1_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

np.random.seed(42)

# Plotting style with fallback for older matplotlib versions
try:
    plt.style.use('seaborn-v0_8-whitegrid')
except OSError:
    try:
        plt.style.use('seaborn-whitegrid')
    except OSError:
        pass  # Use default style

print("âœ… Setup complete!")

## Exercise 1 Solution: Add More Models (LightGBM + MLP)

In [None]:
# Exercise 1: Extended BaselineExperiment with more models

try:
    import lightgbm as lgb
    LGBM_AVAILABLE = True
except ImportError:
    LGBM_AVAILABLE = False

from sklearn.neural_network import MLPClassifier, MLPRegressor

# Load breast cancer data for testing
data = load_breast_cancer()
X, y = data.data.astype(np.float32), data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models
models = {
    'XGBoost': (xgb.XGBClassifier(
        n_estimators=100, max_depth=6, device='cuda',
        random_state=42, verbosity=0
    ), False),
    
    'Random Forest': (RandomForestClassifier(
        n_estimators=100, max_depth=16, n_jobs=-1, random_state=42
    ), False),
    
    'Logistic Regression': (LogisticRegression(
        max_iter=1000, random_state=42, n_jobs=-1
    ), True),  # needs scaling
    
    'MLP Neural Network': (MLPClassifier(
        hidden_layer_sizes=(100, 50, 25),
        max_iter=500,
        early_stopping=True,
        random_state=42
    ), True)  # needs scaling
}

# Add LightGBM if available
if LGBM_AVAILABLE:
    models['LightGBM'] = (lgb.LGBMClassifier(
        n_estimators=100, max_depth=6, device='gpu',
        random_state=42, verbose=-1
    ), False)

# Scale data for models that need it
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Run comparison
print("Extended Model Comparison")
print("=" * 60)

results = []
for name, (model, needs_scaling) in models.items():
    print(f"\nTraining: {name}")
    
    X_tr = X_train_scaled if needs_scaling else X_train
    X_te = X_test_scaled if needs_scaling else X_test
    
    # Train
    start = time()
    model.fit(X_tr, y_train)
    train_time = time() - start
    
    # Predict
    y_pred = model.predict(X_te)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    results.append({
        'Model': name,
        'Accuracy': acc,
        'F1 Score': f1,
        'Train Time (s)': train_time
    })
    
    print(f"  Accuracy: {acc:.4f}, F1: {f1:.4f}, Time: {train_time:.3f}s")

# Summary table
print("\n" + "=" * 60)
print("Summary (sorted by Accuracy):")
results_df = pd.DataFrame(results).sort_values('Accuracy', ascending=False)
print(results_df.to_string(index=False))

## Exercise 2 Solution: Custom Metrics Support

In [None]:
# Exercise 2: Custom profit-based metric

def profit_metric(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    """
    Custom profit metric for a business application.
    
    Scenario: Fraud detection
    - True Positive (caught fraud): +$100 (saved money)
    - False Positive (false alarm): -$50 (investigation cost)
    - False Negative (missed fraud): -$200 (lost money)
    - True Negative: $0 (no action needed)
    
    Returns profit per prediction on average.
    """
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    tn = np.sum((y_true == 0) & (y_pred == 0))
    
    total_profit = 100 * tp - 50 * fp - 200 * fn + 0 * tn
    profit_per_sample = total_profit / len(y_true)
    
    return profit_per_sample

def specificity_metric(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    """True Negative Rate (Specificity)."""
    tn = np.sum((y_true == 0) & (y_pred == 0))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    return tn / (tn + fp) if (tn + fp) > 0 else 0

# Extended comparison with custom metrics
print("Model Comparison with Custom Metrics")
print("=" * 70)

custom_results = []

for name, (model, needs_scaling) in list(models.items())[:3]:  # Top 3 models
    X_tr = X_train_scaled if needs_scaling else X_train
    X_te = X_test_scaled if needs_scaling else X_test
    
    model.fit(X_tr, y_train)
    y_pred = model.predict(X_te)
    
    custom_results.append({
        'Model': name,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Profit per Sample': profit_metric(y_test, y_pred),
        'Specificity': specificity_metric(y_test, y_pred)
    })

custom_df = pd.DataFrame(custom_results)
print(custom_df.to_string(index=False))

print("\nInterpretation:")
print("  - Best accuracy model may not be best for business profit!")
print("  - Specificity matters when false positives are costly.")
print("  - Always align metrics with business objectives.")

## Exercise 3 Solution: Automatic Hyperparameter Tuning Integration

In [None]:
# Exercise 3: Auto-tuning the best model

import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

def tune_best_model(
    X_train: np.ndarray,
    y_train: np.ndarray,
    X_test: np.ndarray,
    y_test: np.ndarray,
    best_model_name: str,
    n_trials: int = 30
) -> Dict:
    """
    Automatically tune the best-performing model using Optuna.
    
    Returns dict with tuned model and results.
    """
    print(f"\nAuto-tuning {best_model_name} with Optuna ({n_trials} trials)...")
    
    if best_model_name == 'XGBoost':
        def objective(trial):
            params = {
                'max_depth': trial.suggest_int('max_depth', 3, 10),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
                'n_estimators': trial.suggest_int('n_estimators', 50, 300),
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
                'subsample': trial.suggest_float('subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
                'device': 'cuda',
                'random_state': 42,
                'verbosity': 0
            }
            
            model = xgb.XGBClassifier(**params)
            cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
            return cv_scores.mean()
        
    elif best_model_name == 'Random Forest':
        def objective(trial):
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 50, 300),
                'max_depth': trial.suggest_int('max_depth', 5, 30),
                'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
                'n_jobs': -1,
                'random_state': 42
            }
            
            model = RandomForestClassifier(**params)
            cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
            return cv_scores.mean()
    else:
        raise ValueError(f"Tuning not implemented for {best_model_name}")
    
    # Run optimization
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
    
    # Get best parameters and train final model
    best_params = study.best_params
    
    if best_model_name == 'XGBoost':
        best_params.update({'device': 'cuda', 'random_state': 42, 'verbosity': 0})
        tuned_model = xgb.XGBClassifier(**best_params)
    else:
        best_params.update({'n_jobs': -1, 'random_state': 42})
        tuned_model = RandomForestClassifier(**best_params)
    
    tuned_model.fit(X_train, y_train)
    y_pred = tuned_model.predict(X_test)
    tuned_accuracy = accuracy_score(y_test, y_pred)
    
    return {
        'model': tuned_model,
        'best_params': best_params,
        'cv_score': study.best_value,
        'test_accuracy': tuned_accuracy,
        'n_trials': n_trials
    }

# Example usage
print("Baseline Comparison + Auto-Tuning")
print("=" * 60)

# First, find best model
best_name = results_df.iloc[0]['Model']
best_baseline_acc = results_df.iloc[0]['Accuracy']
print(f"Best baseline model: {best_name} (Accuracy: {best_baseline_acc:.4f})")

# Auto-tune it
tuning_result = tune_best_model(X_train, y_train, X_test, y_test, best_name, n_trials=30)

print(f"\nTuning Results:")
print(f"  CV Score: {tuning_result['cv_score']:.4f}")
print(f"  Test Accuracy: {tuning_result['test_accuracy']:.4f}")
print(f"  Improvement: {(tuning_result['test_accuracy'] - best_baseline_acc)*100:.2f}%")
print(f"\nBest Parameters:")
for key, val in tuning_result['best_params'].items():
    if key not in ['device', 'random_state', 'verbosity', 'n_jobs']:
        print(f"  {key}: {val}")

## Complete Extended Framework

In [None]:
# Final: Complete extended BaselineExperiment class

@dataclass
class ExtendedModelResult:
    name: str
    standard_metrics: Dict[str, float]
    custom_metrics: Dict[str, float]
    cv_scores: np.ndarray
    train_time: float
    tuned: bool = False
    tuned_params: Optional[Dict] = None

class ExtendedBaselineExperiment:
    """
    Extended baseline framework with:
    - More models (LightGBM, MLP)
    - Custom metrics support
    - Auto-tuning integration
    """
    
    def __init__(self, X, y, task='classification', custom_metrics=None):
        self.X = X.astype(np.float32)
        self.y = y
        self.task = task
        self.custom_metrics = custom_metrics or {}
        self.models = {}
        self.results = []
        
        # Split data
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
    def add_custom_metric(self, name: str, func: Callable):
        """Add a custom metric function."""
        self.custom_metrics[name] = func
        
    def add_all_models(self):
        """Add extended set of models."""
        if self.task == 'classification':
            self.models = {
                'XGBoost': xgb.XGBClassifier(n_estimators=100, device='cuda', verbosity=0),
                'Random Forest': RandomForestClassifier(n_estimators=100, n_jobs=-1),
                'Logistic Regression': LogisticRegression(max_iter=1000),
                'MLP': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500)
            }
            if LGBM_AVAILABLE:
                self.models['LightGBM'] = lgb.LGBMClassifier(n_estimators=100, verbose=-1)
        return self
    
    def run(self):
        """Run all models."""
        for name, model in self.models.items():
            # Train
            start = time()
            model.fit(self.X_train, self.y_train)
            train_time = time() - start
            
            # Predict
            y_pred = model.predict(self.X_test)
            
            # Standard metrics
            std_metrics = {'accuracy': accuracy_score(self.y_test, y_pred)}
            
            # Custom metrics
            cust_metrics = {}
            for metric_name, metric_func in self.custom_metrics.items():
                cust_metrics[metric_name] = metric_func(self.y_test, y_pred)
            
            # CV scores
            cv_scores = cross_val_score(model, self.X_train, self.y_train, cv=5)
            
            self.results.append(ExtendedModelResult(
                name=name,
                standard_metrics=std_metrics,
                custom_metrics=cust_metrics,
                cv_scores=cv_scores,
                train_time=train_time
            ))
        return self
    
    def report(self):
        """Generate report DataFrame."""
        data = []
        for r in self.results:
            row = {'Model': r.name, **r.standard_metrics, **r.custom_metrics,
                   'CV Mean': r.cv_scores.mean(), 'Train Time': r.train_time}
            data.append(row)
        return pd.DataFrame(data).sort_values('accuracy', ascending=False)

# Demo
print("Extended Framework Demo")
print("=" * 60)

exp = ExtendedBaselineExperiment(X, y, task='classification')
exp.add_custom_metric('profit', profit_metric)
exp.add_all_models()
exp.run()

print(exp.report().to_string(index=False))

## Key Takeaways

1. **Extensibility matters** - frameworks should support new models and metrics easily
2. **Custom metrics align ML with business goals** - accuracy isn't always the right target
3. **Auto-tuning can improve results** - but start simple and tune only the best baseline
4. **Reproducibility is key** - fixed seeds, documented splits, consistent evaluation