# Phase 5: Model Evaluation and Visualization

This notebook provides comprehensive evaluation and visualization of all trained models:
1. Detailed performance metrics
2. Confusion matrices
3. ROC curves comparison
4. Feature importance analysis
5. Advanced visualizations

## Models Evaluated
- Naive Bayes
- Decision Tree


In [None]:
# Install required packages
%pip install matplotlib seaborn plotly scikit-learn -q


In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import confusion_matrix, roc_curve, auc, roc_auc_score
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ Libraries imported")


## Step 1: Load Model Results

**Note**: This notebook assumes you've run the previous ML models notebook. Make sure all models and predictions are available.


In [None]:
# Load metrics if available
try:
    metrics_df = pd.read_csv('/content/model_metrics.csv', index_col=0)
    print("✓ Metrics loaded from CSV")
    display(metrics_df)
except:
    print("⚠ Metrics CSV not found. Make sure to run previous notebook first.")
    print("Creating sample metrics structure...")
    metrics_df = None


## Step 2: Model Performance Comparison Table


In [None]:
# Create comprehensive comparison table
if metrics_df is not None:
    # Format metrics for display
    display_df = metrics_df.copy()
    display_df.columns = [col.replace('weighted', '').title() for col in display_df.columns]
    
    print("=== Model Performance Comparison ===")
    display(display_df.round(4))
    
    # Find best model for each metric
    print("\n=== Best Model by Metric ===")
    for metric in metrics_df.columns:
        best_model = metrics_df[metric].idxmax()
        best_value = metrics_df[metric].max()
        print(f"{metric}: {best_model} ({best_value:.4f})")
else:
    print("Please run the ML models notebook first to generate metrics.")


## Step 3: Visualize Model Comparison


In [None]:
# Bar chart comparing all models
if metrics_df is not None:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    axes = axes.flatten()
    
    metrics_to_plot = ['accuracy', 'auc', 'weightedPrecision', 'weightedRecall', 'f1']
    
    for i, metric in enumerate(metrics_to_plot[:2]):  # Show first 2 metrics
        if metric in metrics_df.columns:
            ax = axes[i]
            bars = ax.bar(range(len(metrics_df)), metrics_df[metric], 
                         color=['#e74c3c', '#2ecc71'], alpha=0.7)
            ax.set_xticks(range(len(metrics_df)))
            ax.set_xticklabels(metrics_df.index, rotation=45, ha='right')
            ax.set_title(f'{metric.replace("weighted", "").title()} Comparison', fontweight='bold')
            ax.set_ylabel('Score')
            ax.set_ylim([0, 1])
            ax.grid(True, alpha=0.3, axis='y')
            
            # Add value labels on bars
            for bar in bars:
                height = bar.get_height()
                ax.text(bar.get_x() + bar.get_width()/2., height,
                       f'{height:.3f}', ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.show()
    
    # Additional metrics in separate figure
    if len(metrics_to_plot) > 2:
        fig, axes = plt.subplots(1, 3, figsize=(18, 5))
        for i, metric in enumerate(metrics_to_plot[2:]):
            if metric in metrics_df.columns:
                ax = axes[i]
                bars = ax.bar(range(len(metrics_df)), metrics_df[metric], 
                             color=['#e74c3c', '#2ecc71'], alpha=0.7)
                ax.set_xticks(range(len(metrics_df)))
                ax.set_xticklabels(metrics_df.index, rotation=45, ha='right')
                ax.set_title(f'{metric.replace("weighted", "").title()} Comparison', fontweight='bold')
                ax.set_ylabel('Score')
                ax.set_ylim([0, 1])
                ax.grid(True, alpha=0.3, axis='y')
                
                for bar in bars:
                    height = bar.get_height()
                    ax.text(bar.get_x() + bar.get_width()/2., height,
                           f'{height:.3f}', ha='center', va='bottom', fontsize=9)
        plt.tight_layout()
        plt.show()
else:
    print("Metrics not available for visualization")


## Step 4: Confusion Matrices


In [None]:
# Function to extract predictions and labels from Spark DataFrame
def get_predictions_labels(spark_predictions):
    """Extract predictions and labels from Spark DataFrame."""
    predictions_pd = spark_predictions.select("label", "prediction").toPandas()
    return predictions_pd['label'].values, predictions_pd['prediction'].values

# Create confusion matrices for all models
try:
    predictions_list = [
        ("Naive Bayes", nb_predictions),
        ("Decision Tree", dt_predictions)
    ]
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    axes = axes.flatten()
    
    for i, (model_name, pred_df) in enumerate(predictions_list):
        y_true, y_pred = get_predictions_labels(pred_df)
        cm = confusion_matrix(y_true, y_pred)
        
        ax = axes[i]
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,
                   xticklabels=['Not Cancelled', 'Cancelled'],
                   yticklabels=['Not Cancelled', 'Cancelled'])
        ax.set_title(f'{model_name} - Confusion Matrix', fontweight='bold')
        ax.set_ylabel('True Label')
        ax.set_xlabel('Predicted Label')
    
    plt.tight_layout()
    plt.show()
    
except NameError:
    print("⚠ Predictions not available. Please run the ML models notebook first.")


## Step 5: ROC Curves Comparison


In [None]:
# Function to extract probabilities from Spark predictions
def get_probabilities(spark_predictions):
    """Extract probabilities from Spark DataFrame."""
    predictions_pd = spark_predictions.select("label", "probability").toPandas()
    y_true = predictions_pd['label'].values
    # Extract probability of positive class (class 1)
    y_proba = predictions_pd['probability'].apply(lambda x: x[1]).values
    return y_true, y_proba

# Plot ROC curves for all models
try:
    plt.figure(figsize=(10, 8))
    
    models_data = [
        ("Naive Bayes", nb_predictions, '#e74c3c'),
        ("Decision Tree", dt_predictions, '#2ecc71')
    ]
    
    for model_name, pred_df, color in models_data:
        y_true, y_proba = get_probabilities(pred_df)
        fpr, tpr, _ = roc_curve(y_true, y_proba)
        roc_auc = auc(fpr, tpr)
        
        plt.plot(fpr, tpr, color=color, lw=2, 
                label=f'{model_name} (AUC = {roc_auc:.4f})')
    
    plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--', label='Random Classifier')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.title('ROC Curves Comparison', fontsize=14, fontweight='bold')
    plt.legend(loc="lower right", fontsize=10)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
except NameError:
    print("⚠ Predictions not available. Please run the ML models notebook first.")


## Step 6: Interactive Visualization with Plotly


In [None]:
# Interactive metrics comparison
if metrics_df is not None:
    # Prepare data for Plotly
    metrics_long = metrics_df.reset_index().melt(
        id_vars='model',
        var_name='metric',
        value_name='score'
    )
    
    # Create interactive bar chart
    fig = px.bar(metrics_long, x='model', y='score', color='metric',
                 barmode='group', title='Model Performance Metrics Comparison',
                 labels={'score': 'Score', 'model': 'Model', 'metric': 'Metric'})
    fig.update_layout(height=600, showlegend=True)
    fig.show()
else:
    print("Metrics not available for interactive visualization")


## Step 7: Feature Importance Analysis


In [None]:
# Extract and visualize feature importance from tree-based models
try:
    # Decision Tree feature importance
    dt_importance = dt_model.featureImportances.toArray()
    
    # Create feature importance visualization
    fig, ax = plt.subplots(1, 1, figsize=(10, 8))
    
    # Decision Tree
    top_dt_indices = np.argsort(dt_importance)[-15:][::-1]
    ax.barh(range(len(top_dt_indices)), dt_importance[top_dt_indices], color='coral', alpha=0.7)
    ax.set_yticks(range(len(top_dt_indices)))
    ax.set_yticklabels([f'Feature {idx}' for idx in top_dt_indices])
    ax.set_title('Decision Tree - Top 15 Feature Importance', fontweight='bold', fontsize=14)
    ax.set_xlabel('Importance', fontsize=12)
    ax.invert_yaxis()
    ax.grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.show()
    
except NameError:
    print("⚠ Models not available. Please run the ML models notebook first.")


## Step 8: Performance Summary Dashboard


In [None]:
# Create a comprehensive summary
if metrics_df is not None:
    print("=" * 70)
    print("MODEL PERFORMANCE SUMMARY")
    print("=" * 70)
    
    print("\n1. ACCURACY RANKING:")
    accuracy_rank = metrics_df['accuracy'].sort_values(ascending=False)
    for i, (model, score) in enumerate(accuracy_rank.items(), 1):
        print(f"   {i}. {model}: {score:.4f}")
    
    print("\n2. AUC-ROC RANKING:")
    auc_rank = metrics_df['auc'].sort_values(ascending=False)
    for i, (model, score) in enumerate(auc_rank.items(), 1):
        print(f"   {i}. {model}: {score:.4f}")
    
    print("\n3. F1-SCORE RANKING:")
    f1_rank = metrics_df['f1'].sort_values(ascending=False)
    for i, (model, score) in enumerate(f1_rank.items(), 1):
        print(f"   {i}. {model}: {score:.4f}")
    
    print("\n4. BEST OVERALL MODEL:")
    # Calculate average rank across all metrics
    ranks = pd.DataFrame({
        'accuracy': metrics_df['accuracy'].rank(ascending=False),
        'auc': metrics_df['auc'].rank(ascending=False),
        'f1': metrics_df['f1'].rank(ascending=False),
        'weightedPrecision': metrics_df['weightedPrecision'].rank(ascending=False),
        'weightedRecall': metrics_df['weightedRecall'].rank(ascending=False)
    })
    avg_rank = ranks.mean(axis=1)
    best_model = avg_rank.idxmin()
    print(f"   {best_model} (Average Rank: {avg_rank[best_model]:.2f})")
    
    print("\n" + "=" * 70)
else:
    print("Metrics not available for summary")


## Step 9: Save Visualizations

Save all visualizations for the final report.


In [None]:
# Create reports directory
import os
os.makedirs('/content/reports/figures', exist_ok=True)

# Save metrics table
if metrics_df is not None:
    metrics_df.to_csv('/content/reports/model_metrics_final.csv')
    print("✓ Metrics saved to reports/model_metrics_final.csv")

print("\n✓ All visualizations completed")
print("\nVisualizations can be saved by:")
print("  1. Right-clicking on plots and selecting 'Save image'")
print("  2. Using plt.savefig() in code cells")
print("  3. Downloading from Colab file browser")


## Summary

✓ Model performance comparison completed
✓ Confusion matrices generated
✓ ROC curves compared
✓ Feature importance analyzed
✓ Interactive visualizations created
✓ Performance summary dashboard generated

**Project Complete!** All components have been implemented:
- ✓ Data ingestion into MongoDB
- ✓ Exploratory Data Analysis
- ✓ Spark data processing
- ✓ ML models trained (Naive Bayes, Decision Tree)
- ✓ Comprehensive evaluation and visualization

**Next Steps**: 
- Review all results
- Prepare final report with screenshots
- Document team contributions
- Upload to GitHub
