Lab 14: Model Evaluation and Tuning
This script demonstrates model evaluation metrics and hyperparameter tuning.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.model_selection import (train_test_split, cross_val_score, 
                                     GridSearchCV, RandomizedSearchCV, learning_curve)
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                            f1_score, roc_auc_score, roc_curve, 
                            classification_report, confusion_matrix)
import seaborn as sns


In [None]:
def evaluate_classification_metrics():
    """Demonstrate various classification metrics"""
    print("=" * 50)
    print("Classification Metrics")
    print("=" * 50)
    
    # Load dataset
    cancer = load_breast_cancer()
    X = cancer.data
    y = cancer.target
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Standardize
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    print("\nEvaluation Metrics:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    print(f"ROC-AUC:   {roc_auc:.4f}")
    
    print("\nDetailed Classification Report:")
    print(classification_report(y_test, y_pred, target_names=cancer.target_names))
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=cancer.target_names,
                yticklabels=cancer.target_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig('lab14_confusion_matrix.png')
    plt.close()
    print("\nConfusion matrix saved as 'lab14_confusion_matrix.png'")
    
    return y_test, y_pred_proba


In [None]:
def plot_roc_curves(y_test, y_pred_proba):
    """Plot ROC curves"""
    print("\n" + "=" * 50)
    print("ROC Curve Analysis")
    print("=" * 50)
    
    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    print(f"\nArea Under ROC Curve: {roc_auc:.4f}")
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, 
             label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('lab14_roc_curve.png')
    plt.close()
    print("\nROC curve saved as 'lab14_roc_curve.png'")


In [None]:
def cross_validation_evaluation():
    """Demonstrate cross-validation"""
    print("\n" + "=" * 50)
    print("Cross-Validation Evaluation")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Create model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    
    # Perform k-fold cross-validation
    cv_scores = cross_val_score(model, X, y, cv=5)
    
    print("\n5-Fold Cross-Validation Results:")
    print(f"Scores: {cv_scores}")
    print(f"Mean Accuracy: {cv_scores.mean():.4f}")
    print(f"Standard Deviation: {cv_scores.std():.4f}")
    print(f"95% Confidence Interval: [{cv_scores.mean() - 1.96*cv_scores.std():.4f}, "
          f"{cv_scores.mean() + 1.96*cv_scores.std():.4f}]")
    
    # Visualize
    plt.figure(figsize=(10, 6))
    plt.bar(range(1, len(cv_scores) + 1), cv_scores, color='skyblue', edgecolor='black')
    plt.axhline(y=cv_scores.mean(), color='r', linestyle='--', 
                label=f'Mean: {cv_scores.mean():.4f}')
    plt.xlabel('Fold')
    plt.ylabel('Accuracy')
    plt.title('Cross-Validation Scores')
    plt.legend()
    plt.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.savefig('lab14_cross_validation.png')
    plt.close()
    print("\nCross-validation plot saved as 'lab14_cross_validation.png'")


In [None]:
def grid_search_tuning():
    """Demonstrate Grid Search for hyperparameter tuning"""
    print("\n" + "=" * 50)
    print("Grid Search Hyperparameter Tuning")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Define parameter grid
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7, None],
        'min_samples_split': [2, 5, 10]
    }
    
    print("\nParameter Grid:")
    for param, values in param_grid.items():
        print(f"  {param}: {values}")
    
    # Create model
    rf = RandomForestClassifier(random_state=42)
    
    # Perform Grid Search
    print("\nPerforming Grid Search...")
    grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    print("\nBest Parameters:")
    for param, value in grid_search.best_params_.items():
        print(f"  {param}: {value}")
    
    print(f"\nBest Cross-Validation Score: {grid_search.best_score_:.4f}")
    
    # Evaluate on test set
    best_model = grid_search.best_estimator_
    test_score = best_model.score(X_test, y_test)
    print(f"Test Set Score: {test_score:.4f}")
    
    # Visualize results
    results = pd.DataFrame(grid_search.cv_results_)
    
    # Plot top configurations
    results_sorted = results.sort_values('mean_test_score', ascending=False).head(10)
    
    plt.figure(figsize=(12, 6))
    x_pos = range(len(results_sorted))
    plt.barh(x_pos, results_sorted['mean_test_score'], color='lightblue', edgecolor='black')
    plt.yticks(x_pos, [f"Config {i+1}" for i in range(len(results_sorted))])
    plt.xlabel('Mean Cross-Validation Score')
    plt.title('Top 10 Hyperparameter Configurations')
    plt.grid(True, alpha=0.3, axis='x')
    plt.tight_layout()
    plt.savefig('lab14_grid_search.png')
    plt.close()
    print("\nGrid search results saved as 'lab14_grid_search.png'")


In [None]:
def random_search_tuning():
    """Demonstrate Random Search for hyperparameter tuning"""
    print("\n" + "=" * 50)
    print("Random Search Hyperparameter Tuning")
    print("=" * 50)
    
    # Load dataset
    cancer = load_breast_cancer()
    X = cancer.data
    y = cancer.target
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Define parameter distribution
    param_dist = {
        'C': [0.1, 1, 10, 100],
        'gamma': ['scale', 'auto', 0.001, 0.01, 0.1],
        'kernel': ['rbf', 'poly']
    }
    
    print("\nParameter Distribution:")
    for param, values in param_dist.items():
        print(f"  {param}: {values}")
    
    # Create model
    svc = SVC(random_state=42)
    
    # Perform Random Search
    print("\nPerforming Random Search (20 iterations)...")
    random_search = RandomizedSearchCV(
        svc, param_dist, n_iter=20, cv=3, 
        scoring='accuracy', random_state=42, n_jobs=-1
    )
    random_search.fit(X_train, y_train)
    
    print("\nBest Parameters:")
    for param, value in random_search.best_params_.items():
        print(f"  {param}: {value}")
    
    print(f"\nBest Cross-Validation Score: {random_search.best_score_:.4f}")
    
    # Evaluate on test set
    best_model = random_search.best_estimator_
    test_score = best_model.score(X_test, y_test)
    print(f"Test Set Score: {test_score:.4f}")


In [None]:
def learning_curve_analysis():
    """Analyze learning curves to detect overfitting/underfitting"""
    print("\n" + "=" * 50)
    print("Learning Curve Analysis")
    print("=" * 50)
    
    # Load dataset
    cancer = load_breast_cancer()
    X = cancer.data
    y = cancer.target
    
    # Create model
    model = DecisionTreeClassifier(random_state=42)
    
    # Calculate learning curve
    train_sizes, train_scores, val_scores = learning_curve(
        model, X, y, cv=5, n_jobs=-1,
        train_sizes=np.linspace(0.1, 1.0, 10),
        scoring='accuracy'
    )
    
    # Calculate mean and std
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    val_mean = np.mean(val_scores, axis=1)
    val_std = np.std(val_scores, axis=1)
    
    print("\nLearning Curve Summary:")
    print(f"Final Training Score: {train_mean[-1]:.4f} (+/- {train_std[-1]:.4f})")
    print(f"Final Validation Score: {val_mean[-1]:.4f} (+/- {val_std[-1]:.4f})")
    
    # Plot learning curve
    plt.figure(figsize=(10, 6))
    plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
    plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, 
                     alpha=0.1, color='blue')
    plt.plot(train_sizes, val_mean, 'o-', color='green', label='Validation Score')
    plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, 
                     alpha=0.1, color='green')
    plt.xlabel('Training Set Size')
    plt.ylabel('Accuracy')
    plt.title('Learning Curve')
    plt.legend(loc='best')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('lab14_learning_curve.png')
    plt.close()
    print("\nLearning curve saved as 'lab14_learning_curve.png'")


In [None]:
def compare_models():
    """Compare different models"""
    print("\n" + "=" * 50)
    print("Model Comparison")
    print("=" * 50)
    
    # Load dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Define models
    models = {
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
        'Decision Tree': DecisionTreeClassifier(random_state=42),
        'SVM': SVC(random_state=42)
    }
    
    # Evaluate each model
    results = {}
    print("\nEvaluating models with 5-fold cross-validation:")
    
    for name, model in models.items():
        scores = cross_val_score(model, X, y, cv=5)
        results[name] = {
            'mean': scores.mean(),
            'std': scores.std(),
            'scores': scores
        }
        print(f"\n{name}:")
        print(f"  Mean Accuracy: {scores.mean():.4f}")
        print(f"  Std Dev: {scores.std():.4f}")
    
    # Visualize comparison
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Bar plot
    model_names = list(results.keys())
    means = [results[name]['mean'] for name in model_names]
    stds = [results[name]['std'] for name in model_names]
    
    ax1.bar(model_names, means, yerr=stds, capsize=5, 
            color=['blue', 'green', 'orange'], alpha=0.7, edgecolor='black')
    ax1.set_ylabel('Accuracy')
    ax1.set_title('Model Comparison (Mean ± Std)')
    ax1.grid(True, alpha=0.3, axis='y')
    ax1.set_ylim([0.9, 1.0])
    
    # Box plot
    all_scores = [results[name]['scores'] for name in model_names]
    ax2.boxplot(all_scores, labels=model_names)
    ax2.set_ylabel('Accuracy')
    ax2.set_title('Model Comparison (Distribution)')
    ax2.grid(True, alpha=0.3, axis='y')
    ax2.set_ylim([0.9, 1.0])
    
    plt.tight_layout()
    plt.savefig('lab14_model_comparison.png')
    plt.close()
    print("\nModel comparison plot saved as 'lab14_model_comparison.png'")


In [None]:
def bias_variance_tradeoff():
    """Demonstrate bias-variance tradeoff"""
    print("\n" + "=" * 50)
    print("Bias-Variance Tradeoff")
    print("=" * 50)
    
    # Load dataset
    cancer = load_breast_cancer()
    X = cancer.data
    y = cancer.target
    
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    # Test different max_depth values
    max_depths = range(1, 21)
    train_scores = []
    test_scores = []
    
    print("\nTesting different model complexities:")
    for depth in max_depths:
        model = DecisionTreeClassifier(max_depth=depth, random_state=42)
        model.fit(X_train, y_train)
        
        train_score = model.score(X_train, y_train)
        test_score = model.score(X_test, y_test)
        
        train_scores.append(train_score)
        test_scores.append(test_score)
        
        if depth % 5 == 0:
            print(f"Max Depth {depth}: Train={train_score:.4f}, Test={test_score:.4f}")
    
    # Plot
    plt.figure(figsize=(10, 6))
    plt.plot(max_depths, train_scores, 'o-', label='Training Score')
    plt.plot(max_depths, test_scores, 's-', label='Test Score')
    plt.xlabel('Model Complexity (Max Depth)')
    plt.ylabel('Accuracy')
    plt.title('Bias-Variance Tradeoff')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Mark regions
    plt.axvspan(1, 5, alpha=0.1, color='red', label='High Bias (Underfitting)')
    plt.axvspan(15, 20, alpha=0.1, color='blue', label='High Variance (Overfitting)')
    
    plt.tight_layout()
    plt.savefig('lab14_bias_variance.png')
    plt.close()
    print("\nBias-variance plot saved as 'lab14_bias_variance.png'")


In [None]:
def main():
    """Main function to demonstrate model evaluation and tuning"""
    print("\n" + "=" * 50)
    print("Lab 14: Model Evaluation and Tuning")
    print("=" * 50)
    
    # Evaluation metrics
    y_test, y_pred_proba = evaluate_classification_metrics()
    
    # ROC curves
    plot_roc_curves(y_test, y_pred_proba)
    
    # Cross-validation
    cross_validation_evaluation()
    
    # Grid search
    grid_search_tuning()
    
    # Random search
    random_search_tuning()
    
    # Learning curves
    learning_curve_analysis()
    
    # Model comparison
    compare_models()
    
    # Bias-variance tradeoff
    bias_variance_tradeoff()
    
    print("\n" + "=" * 50)
    print("Lab 14 Complete!")
    print("=" * 50)


In [None]:
if __name__ == "__main__":
    main()
