In [None]:
# Import all necessary libraries
import kagglehub
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os
warnings.filterwarnings('ignore')

# Machine Learning Libraries
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix, 
    roc_auc_score, roc_curve, precision_recall_curve, f1_score
)
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
import xgboost as xgb

# Set random seed for reproducibility
np.random.seed(42)

print("Libraries imported successfully!")


In [None]:
# Download and load the dataset
path = kagglehub.dataset_download("blastchar/telco-customer-churn")
file_path = os.path.join(path, 'WA_Fn-UseC_-Telco-Customer-Churn.csv')
df = pd.read_csv(file_path)

print(f"Dataset shape: {df.shape}")
print(f"\nFirst 5 rows:")
df.head()


In [None]:
# Improved Data Preprocessing
def preprocess_data(df):
    """Enhanced preprocessing with better feature engineering"""
    
    # Create a copy
    df_processed = df.copy()
    
    # Drop customerID
    df_processed = df_processed.drop('customerID', axis=1)
    
    # Handle TotalCharges conversion
    df_processed['TotalCharges'] = pd.to_numeric(df_processed['TotalCharges'], errors='coerce')
    
    # Fill missing TotalCharges with median instead of 0
    df_processed['TotalCharges'].fillna(df_processed['TotalCharges'].median(), inplace=True)
    
    # Feature Engineering - Create new meaningful features
    # 1. Average monthly charges per tenure month
    df_processed['AvgChargesPerMonth'] = df_processed['TotalCharges'] / (df_processed['tenure'] + 1)
    
    # 2. Tenure groups
    df_processed['TenureGroup'] = pd.cut(df_processed['tenure'], 
                                        bins=[0, 12, 24, 48, 72], 
                                        labels=['0-1Year', '1-2Years', '2-4Years', '4+Years'])
    
    # 3. Monthly charges groups
    df_processed['ChargesGroup'] = pd.cut(df_processed['MonthlyCharges'], 
                                         bins=[0, 35, 65, 95, 120], 
                                         labels=['Low', 'Medium', 'High', 'VeryHigh'])
    
    # 4. Total services count
    service_cols = ['PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
                   'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']
    
    df_processed['TotalServices'] = 0
    for col in service_cols:
        df_processed['TotalServices'] += (df_processed[col] == 'Yes').astype(int)
    
    # 5. Has any streaming service
    df_processed['HasStreaming'] = ((df_processed['StreamingTV'] == 'Yes') | 
                                   (df_processed['StreamingMovies'] == 'Yes')).astype(int)
    
    # 6. Has any protection service
    df_processed['HasProtection'] = ((df_processed['OnlineSecurity'] == 'Yes') | 
                                    (df_processed['OnlineBackup'] == 'Yes') | 
                                    (df_processed['DeviceProtection'] == 'Yes') | 
                                    (df_processed['TechSupport'] == 'Yes')).astype(int)
    
    return df_processed

# Apply preprocessing
df_processed = preprocess_data(df)
print(f"After preprocessing: {df_processed.shape}")
print(f"\nNew features created: {df_processed.columns.tolist()[-6:]}")


In [None]:
# Advanced Feature Encoding
def encode_features(df):
    """Enhanced encoding with proper handling of categorical variables"""
    
    df_encoded = df.copy()
    
    # Separate target variable
    y = df_encoded['Churn'].map({'No': 0, 'Yes': 1})
    X = df_encoded.drop('Churn', axis=1)
    
    # Handle categorical variables with proper encoding
    # Binary categorical variables
    binary_cols = ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
    for col in binary_cols:
        if col in X.columns:
            X[col] = X[col].map({'No': 0, 'Yes': 1, 'Male': 1, 'Female': 0})
    
    # Multi-categorical variables - use one-hot encoding
    categorical_cols = ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
                       'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
                       'Contract', 'PaymentMethod', 'TenureGroup', 'ChargesGroup']
    
    # Apply one-hot encoding
    X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)
    
    # Handle SeniorCitizen (already numeric)
    # Keep numerical features as they are
    
    return X_encoded, y

X, y = encode_features(df_processed)
print(f"Features shape: {X.shape}")
print(f"Target distribution:\n{y.value_counts()}")
print(f"Churn rate: {y.mean():.3f}")


In [None]:
# Train-Test Split with Stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")
print(f"\nTraining set churn rate: {y_train.mean():.3f}")
print(f"Test set churn rate: {y_test.mean():.3f}")


In [None]:
# Model Training with Hyperparameter Tuning
def train_improved_models(X_train, y_train):
    """Train multiple models with hyperparameter tuning"""
    
    models = {}
    
    # 1. Logistic Regression with hyperparameter tuning
    print("Training Logistic Regression...")
    lr_params = {
        'classifier__C': [0.1, 1, 10],
        'classifier__penalty': ['l1', 'l2'],
        'classifier__solver': ['liblinear']
    }
    
    lr_pipeline = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('scaler', StandardScaler()),
        ('classifier', LogisticRegression(random_state=42, max_iter=1000))
    ])
    
    lr_grid = GridSearchCV(lr_pipeline, lr_params, cv=3, scoring='roc_auc', n_jobs=-1)
    lr_grid.fit(X_train, y_train)
    models['Logistic Regression'] = lr_grid.best_estimator_
    print(f"Best LR params: {lr_grid.best_params_}")
    
    # 2. Random Forest with hyperparameter tuning
    print("Training Random Forest...")
    rf_params = {
        'classifier__n_estimators': [100, 200],
        'classifier__max_depth': [10, 20],
        'classifier__min_samples_split': [2, 5]
    }
    
    rf_pipeline = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('classifier', RandomForestClassifier(random_state=42))
    ])
    
    rf_grid = GridSearchCV(rf_pipeline, rf_params, cv=3, scoring='roc_auc', n_jobs=-1)
    rf_grid.fit(X_train, y_train)
    models['Random Forest'] = rf_grid.best_estimator_
    print(f"Best RF params: {rf_grid.best_params_}")
    
    # 3. XGBoost with hyperparameter tuning
    print("Training XGBoost...")
    xgb_params = {
        'classifier__n_estimators': [100, 200],
        'classifier__max_depth': [3, 6],
        'classifier__learning_rate': [0.1, 0.2]
    }
    
    xgb_pipeline = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('scaler', StandardScaler()),
        ('classifier', xgb.XGBClassifier(random_state=42, eval_metric='logloss'))
    ])
    
    xgb_grid = GridSearchCV(xgb_pipeline, xgb_params, cv=3, scoring='roc_auc', n_jobs=-1)
    xgb_grid.fit(X_train, y_train)
    models['XGBoost'] = xgb_grid.best_estimator_
    print(f"Best XGB params: {xgb_grid.best_params_}")
    
    # 4. Gradient Boosting
    print("Training Gradient Boosting...")
    gb_params = {
        'classifier__n_estimators': [100, 200],
        'classifier__max_depth': [3, 5],
        'classifier__learning_rate': [0.1, 0.2]
    }
    
    gb_pipeline = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('classifier', GradientBoostingClassifier(random_state=42))
    ])
    
    gb_grid = GridSearchCV(gb_pipeline, gb_params, cv=3, scoring='roc_auc', n_jobs=-1)
    gb_grid.fit(X_train, y_train)
    models['Gradient Boosting'] = gb_grid.best_estimator_
    print(f"Best GB params: {gb_grid.best_params_}")
    
    return models

# Train all models
print("Starting model training with hyperparameter tuning...")
models = train_improved_models(X_train, y_train)
print("\nAll models trained successfully!")


In [None]:
# Model Evaluation
def evaluate_models(models, X_test, y_test):
    """Comprehensive model evaluation"""
    
    results = {}
    
    plt.figure(figsize=(15, 10))
    
    for i, (name, model) in enumerate(models.items()):
        # Predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        
        # Metrics
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred_proba)
        
        results[name] = {
            'Accuracy': accuracy,
            'F1-Score': f1,
            'ROC-AUC': roc_auc,
            'Predictions': y_pred,
            'Probabilities': y_pred_proba
        }
        
        # ROC Curve
        plt.subplot(2, 3, i+1)
        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
        plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.3f})')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'ROC Curve - {name}')
        plt.legend()
        plt.grid(True)
    
    # Comparison plot
    plt.subplot(2, 3, 6)
    metrics_df = pd.DataFrame(results).T[['Accuracy', 'F1-Score', 'ROC-AUC']]
    metrics_df.plot(kind='bar', ax=plt.gca())
    plt.title('Model Comparison')
    plt.ylabel('Score')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    return results

# Evaluate all models
results = evaluate_models(models, X_test, y_test)

# Print results
print("\n" + "="*60)
print("MODEL PERFORMANCE COMPARISON")
print("="*60)

for name, metrics in results.items():
    print(f"\n{name}:")
    print(f"  Accuracy:  {metrics['Accuracy']:.4f}")
    print(f"  F1-Score:  {metrics['F1-Score']:.4f}")
    print(f"  ROC-AUC:   {metrics['ROC-AUC']:.4f}")


In [None]:
# Final Analysis and Best Model Selection
def final_analysis(results, models, X_test, y_test):
    """Select best model and provide detailed analysis"""
    
    # Find best model
    best_model_name = max(results.keys(), key=lambda x: results[x]['ROC-AUC'])
    best_model = models[best_model_name]
    best_metrics = results[best_model_name]
    
    print(f"\n" + "="*60)
    print(f"BEST MODEL: {best_model_name}")
    print("="*60)
    print(f"Accuracy:  {best_metrics['Accuracy']:.4f}")
    print(f"F1-Score:  {best_metrics['F1-Score']:.4f}")
    print(f"ROC-AUC:   {best_metrics['ROC-AUC']:.4f}")
    
    # Detailed classification report
    print(f"\nDetailed Classification Report:")
    print(classification_report(y_test, best_metrics['Predictions'], 
                              target_names=['No Churn', 'Churn']))
    
    # Confusion Matrix and Feature Importance
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, best_metrics['Predictions'])
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['No Churn', 'Churn'],
                yticklabels=['No Churn', 'Churn'], ax=axes[0])
    axes[0].set_title(f'Confusion Matrix - {best_model_name}')
    axes[0].set_ylabel('True Label')
    axes[0].set_xlabel('Predicted Label')
    
    # Feature Importance (if available)
    if hasattr(best_model.named_steps['classifier'], 'feature_importances_'):
        importances = best_model.named_steps['classifier'].feature_importances_
        feature_names = X.columns.tolist()
        
        # Sort features by importance
        indices = np.argsort(importances)[::-1][:10]  # Top 10 features
        
        axes[1].barh(range(len(indices)), importances[indices])
        axes[1].set_yticks(range(len(indices)))
        axes[1].set_yticklabels([feature_names[i] for i in indices])
        axes[1].set_title('Top 10 Feature Importances')
        axes[1].set_xlabel('Importance')
    else:
        axes[1].text(0.5, 0.5, 'Feature importance\\nnot available\\nfor this model', 
                    ha='center', va='center', transform=axes[1].transAxes)
        axes[1].set_title('Feature Importance')
    
    plt.tight_layout()
    plt.show()
    
    return best_model_name, best_model, best_metrics

best_name, best_model, best_metrics = final_analysis(results, models, X_test, y_test)


In [None]:
# Advanced Ensemble Methods with Focus on Random Forest
def create_advanced_ensemble(X_train, y_train):
    """Create advanced ensemble methods with Random Forest focus"""
    
    ensemble_models = {}
    
    # 1. Bagging Ensemble with Multiple Random Forests
    print("Creating Bagging Ensemble with Random Forest...")
    from sklearn.ensemble import BaggingClassifier
    
    # Base Random Forest with different configurations
    rf_base = RandomForestClassifier(
        n_estimators=200,
        max_depth=15,
        min_samples_split=3,
        min_samples_leaf=1,
        class_weight='balanced',  # Handle class imbalance
        random_state=42
    )
    
    bagging_rf = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('classifier', BaggingClassifier(
            base_estimator=rf_base,
            n_estimators=10,
            random_state=42,
            n_jobs=-1
        ))
    ])
    
    bagging_rf.fit(X_train, y_train)
    ensemble_models['Bagging RF'] = bagging_rf
    
    # 2. Voting Classifier with Multiple Random Forests
    print("Creating Voting Classifier with Random Forest variants...")
    
    # Different RF configurations
    rf1 = RandomForestClassifier(n_estimators=100, max_depth=10, class_weight='balanced', random_state=42)
    rf2 = RandomForestClassifier(n_estimators=200, max_depth=15, class_weight='balanced', random_state=43)
    rf3 = RandomForestClassifier(n_estimators=150, max_depth=20, class_weight='balanced', random_state=44)
    
    voting_rf = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('classifier', VotingClassifier(
            estimators=[
                ('rf1', rf1),
                ('rf2', rf2),
                ('rf3', rf3)
            ],
            voting='soft'
        ))
    ])
    
    voting_rf.fit(X_train, y_train)
    ensemble_models['Voting RF'] = voting_rf
    
    # 3. Stacking Ensemble with Random Forest as Meta-learner
    print("Creating Stacking Ensemble with Random Forest...")
    from sklearn.ensemble import StackingClassifier
    
    # Base models
    base_models = [
        ('rf', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)),
        ('xgb', xgb.XGBClassifier(n_estimators=100, class_weight='balanced', random_state=42, eval_metric='logloss')),
        ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42))
    ]
    
    # Meta-learner (Random Forest)
    meta_learner = RandomForestClassifier(n_estimators=50, class_weight='balanced', random_state=42)
    
    stacking_rf = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('classifier', StackingClassifier(
            estimators=base_models,
            final_estimator=meta_learner,
            cv=3
        ))
    ])
    
    stacking_rf.fit(X_train, y_train)
    ensemble_models['Stacking RF'] = stacking_rf
    
    # 4. AdaBoost with Random Forest
    print("Creating AdaBoost with Random Forest...")
    from sklearn.ensemble import AdaBoostClassifier
    
    ada_rf = ImbPipeline([
        ('smote', SMOTE(random_state=42)),
        ('classifier', AdaBoostClassifier(
            base_estimator=RandomForestClassifier(n_estimators=50, max_depth=5, class_weight='balanced', random_state=42),
            n_estimators=50,
            learning_rate=0.1,
            random_state=42
        ))
    ])
    
    ada_rf.fit(X_train, y_train)
    ensemble_models['AdaBoost RF'] = ada_rf
    
    # 5. Balanced Random Forest with Cost-Sensitive Learning
    print("Creating Balanced Random Forest...")
    from imblearn.ensemble import BalancedRandomForestClassifier
    
    balanced_rf = ImbPipeline([
        ('classifier', BalancedRandomForestClassifier(
            n_estimators=200,
            max_depth=15,
            min_samples_split=3,
            sampling_strategy='auto',
            replacement=True,
            random_state=42
        ))
    ])
    
    balanced_rf.fit(X_train, y_train)
    ensemble_models['Balanced RF'] = balanced_rf
    
    return ensemble_models

# Create advanced ensemble models
print("Creating advanced ensemble models...")
ensemble_models = create_advanced_ensemble(X_train, y_train)
print("Advanced ensemble models created successfully!")


In [None]:
# Threshold Optimization for Better Minority Class Performance
def optimize_threshold(model, X_test, y_test, model_name):
    """Optimize classification threshold for better minority class performance"""
    
    # Get prediction probabilities
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Try different thresholds
    thresholds = np.arange(0.1, 0.9, 0.05)
    best_threshold = 0.5
    best_f1 = 0
    
    results = []
    
    for threshold in thresholds:
        y_pred_thresh = (y_pred_proba >= threshold).astype(int)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred_thresh)
        f1 = f1_score(y_test, y_pred_thresh)
        
        # Calculate precision and recall for minority class
        from sklearn.metrics import precision_recall_fscore_support
        precision, recall, _, _ = precision_recall_fscore_support(y_test, y_pred_thresh, average=None)
        
        minority_precision = precision[1]  # Churn class
        minority_recall = recall[1]       # Churn class
        
        results.append({
            'threshold': threshold,
            'accuracy': accuracy,
            'f1_score': f1,
            'minority_precision': minority_precision,
            'minority_recall': minority_recall
        })
        
        # Update best threshold based on F1 score
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    
    # Convert to DataFrame for easy analysis
    results_df = pd.DataFrame(results)
    
    # Plot threshold analysis
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 3, 1)
    plt.plot(results_df['threshold'], results_df['accuracy'], 'b-', label='Accuracy')
    plt.plot(results_df['threshold'], results_df['f1_score'], 'r-', label='F1 Score')
    plt.axvline(x=best_threshold, color='g', linestyle='--', label=f'Best Threshold: {best_threshold:.2f}')
    plt.xlabel('Threshold')
    plt.ylabel('Score')
    plt.title(f'{model_name} - Accuracy vs F1')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1, 3, 2)
    plt.plot(results_df['threshold'], results_df['minority_precision'], 'b-', label='Precision')
    plt.plot(results_df['threshold'], results_df['minority_recall'], 'r-', label='Recall')
    plt.axvline(x=best_threshold, color='g', linestyle='--', label=f'Best Threshold: {best_threshold:.2f}')
    plt.xlabel('Threshold')
    plt.ylabel('Score')
    plt.title(f'{model_name} - Minority Class Performance')
    plt.legend()
    plt.grid(True)
    
    plt.subplot(1, 3, 3)
    # Precision-Recall curve
    from sklearn.metrics import precision_recall_curve
    precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)
    plt.plot(recall_curve, precision_curve, 'b-')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'{model_name} - Precision-Recall Curve')
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    return best_threshold, results_df

# Optimize thresholds for ensemble models
print("Optimizing thresholds for ensemble models...")
optimized_thresholds = {}

for name, model in ensemble_models.items():
    print(f"\nOptimizing threshold for {name}...")
    best_thresh, thresh_results = optimize_threshold(model, X_test, y_test, name)
    optimized_thresholds[name] = best_thresh
    print(f"Best threshold for {name}: {best_thresh:.3f}")


In [None]:
# Comprehensive Evaluation of Ensemble Models
def evaluate_ensemble_models(ensemble_models, optimized_thresholds, X_test, y_test):
    """Evaluate ensemble models with optimized thresholds"""
    
    ensemble_results = {}
    
    plt.figure(figsize=(20, 12))
    
    for i, (name, model) in enumerate(ensemble_models.items()):
        # Get predictions with default threshold
        y_pred_default = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        
        # Get predictions with optimized threshold
        optimal_threshold = optimized_thresholds[name]
        y_pred_optimized = (y_pred_proba >= optimal_threshold).astype(int)
        
        # Calculate metrics for both thresholds
        # Default threshold metrics
        accuracy_default = accuracy_score(y_test, y_pred_default)
        f1_default = f1_score(y_test, y_pred_default)
        roc_auc = roc_auc_score(y_test, y_pred_proba)
        
        # Optimized threshold metrics
        accuracy_optimized = accuracy_score(y_test, y_pred_optimized)
        f1_optimized = f1_score(y_test, y_pred_optimized)
        
        # Detailed metrics for minority class (optimized threshold)
        from sklearn.metrics import precision_recall_fscore_support
        precision, recall, _, _ = precision_recall_fscore_support(y_test, y_pred_optimized, average=None)
        
        ensemble_results[name] = {
            'Accuracy (Default)': accuracy_default,
            'F1-Score (Default)': f1_default,
            'Accuracy (Optimized)': accuracy_optimized,
            'F1-Score (Optimized)': f1_optimized,
            'ROC-AUC': roc_auc,
            'Churn Precision': precision[1],
            'Churn Recall': recall[1],
            'Optimal Threshold': optimal_threshold,
            'Predictions (Optimized)': y_pred_optimized,
            'Probabilities': y_pred_proba
        }\n        \n        # Plot ROC curves\n        plt.subplot(3, 4, i+1)\n        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)\n        plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.3f})')\n        plt.plot([0, 1], [0, 1], 'k--')\n        plt.xlabel('False Positive Rate')\n        plt.ylabel('True Positive Rate')\n        plt.title(f'ROC - {name}')\n        plt.legend()\n        plt.grid(True)\n        \n        # Plot Precision-Recall curves\n        plt.subplot(3, 4, i+6)\n        precision_curve, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)\n        plt.plot(recall_curve, precision_curve)\n        plt.xlabel('Recall')\n        plt.ylabel('Precision')\n        plt.title(f'PR Curve - {name}')\n        plt.grid(True)\n    \n    # Comparison plot\n    plt.subplot(3, 4, 11)\n    metrics_df = pd.DataFrame(ensemble_results).T[['Accuracy (Optimized)', 'F1-Score (Optimized)', 'ROC-AUC']]\n    metrics_df.plot(kind='bar', ax=plt.gca())\n    plt.title('Ensemble Model Comparison')\n    plt.ylabel('Score')\n    plt.xticks(rotation=45)\n    plt.legend()\n    plt.grid(True)\n    \n    # Minority class performance comparison\n    plt.subplot(3, 4, 12)\n    minority_df = pd.DataFrame(ensemble_results).T[['Churn Precision', 'Churn Recall']]\n    minority_df.plot(kind='bar', ax=plt.gca())\n    plt.title('Minority Class Performance')\n    plt.ylabel('Score')\n    plt.xticks(rotation=45)\n    plt.legend()\n    plt.grid(True)\n    \n    plt.tight_layout()\n    plt.show()\n    \n    return ensemble_results\n\n# Evaluate ensemble models\nensemble_results = evaluate_ensemble_models(ensemble_models, optimized_thresholds, X_test, y_test)\n\n# Print detailed results\nprint(\"\\n\" + \"=\"*80)\nprint(\"ENSEMBLE MODELS PERFORMANCE COMPARISON\")\nprint(\"=\"*80)\n\nfor name, metrics in ensemble_results.items():\n    print(f\"\\n{name}:\")\n    print(f\"  Accuracy (Default):    {metrics['Accuracy (Default)']:.4f}\")\n    print(f\"  Accuracy (Optimized):  {metrics['Accuracy (Optimized)']:.4f}\")\n    print(f\"  F1-Score (Default):    {metrics['F1-Score (Default)']:.4f}\")\n    print(f\"  F1-Score (Optimized):  {metrics['F1-Score (Optimized)']:.4f}\")\n    print(f\"  ROC-AUC:              {metrics['ROC-AUC']:.4f}\")\n    print(f\"  Churn Precision:      {metrics['Churn Precision']:.4f}\")\n    print(f\"  Churn Recall:         {metrics['Churn Recall']:.4f}\")\n    print(f\"  Optimal Threshold:    {metrics['Optimal Threshold']:.3f}\")


In [None]:
# Best Ensemble Model Selection and Final Analysis
def select_best_ensemble_model(ensemble_results):
    """Select the best ensemble model based on multiple criteria"""
    
    # Calculate composite score (weighted combination of metrics)
    composite_scores = {}
    
    for name, metrics in ensemble_results.items():
        # Weight: ROC-AUC (40%), Churn Recall (35%), F1-Score (25%)
        # Prioritize minority class recall for business impact
        composite_score = (0.40 * metrics['ROC-AUC'] + 
                          0.35 * metrics['Churn Recall'] + 
                          0.25 * metrics['F1-Score (Optimized)'])
        
        composite_scores[name] = composite_score
    
    # Find best model
    best_model_name = max(composite_scores.keys(), key=lambda x: composite_scores[x])
    best_model = ensemble_models[best_model_name]
    best_metrics = ensemble_results[best_model_name]
    
    print("\\n" + "="*80)
    print(f"BEST ENSEMBLE MODEL: {best_model_name}")
    print("="*80)
    print(f"Composite Score: {composite_scores[best_model_name]:.4f}")
    print("\\nPerformance Metrics:")
    print(f"  Accuracy (Optimized):  {best_metrics['Accuracy (Optimized)']:.4f}")
    print(f"  F1-Score (Optimized):  {best_metrics['F1-Score (Optimized)']:.4f}")
    print(f"  ROC-AUC:              {best_metrics['ROC-AUC']:.4f}")
    print(f"  Churn Precision:      {best_metrics['Churn Precision']:.4f}")
    print(f"  Churn Recall:         {best_metrics['Churn Recall']:.4f}")
    print(f"  Optimal Threshold:    {best_metrics['Optimal Threshold']:.3f}")
    
    # Detailed classification report
    print("\\nDetailed Classification Report (Optimized Threshold):")
    print(classification_report(y_test, best_metrics['Predictions (Optimized)'], 
                              target_names=['No Churn', 'Churn']))
    
    # Confusion matrices comparison
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    
    # Default threshold confusion matrix
    y_pred_default = best_model.predict(X_test)
    cm_default = confusion_matrix(y_test, y_pred_default)
    sns.heatmap(cm_default, annot=True, fmt='d', cmap='Blues', 
                xticklabels=['No Churn', 'Churn'],
                yticklabels=['No Churn', 'Churn'], ax=axes[0])
    axes[0].set_title(f'{best_model_name} - Default Threshold (0.5)')
    axes[0].set_ylabel('True Label')
    axes[0].set_xlabel('Predicted Label')
    
    # Optimized threshold confusion matrix
    cm_optimized = confusion_matrix(y_test, best_metrics['Predictions (Optimized)'])
    sns.heatmap(cm_optimized, annot=True, fmt='d', cmap='Greens', 
                xticklabels=['No Churn', 'Churn'],
                yticklabels=['No Churn', 'Churn'], ax=axes[1])
    axes[1].set_title(f'{best_model_name} - Optimized Threshold ({best_metrics["Optimal Threshold"]:.3f})')
    axes[1].set_ylabel('True Label')
    axes[1].set_xlabel('Predicted Label')
    
    plt.tight_layout()
    plt.show()
    
    return best_model_name, best_model, best_metrics, composite_scores

# Select best ensemble model
best_ensemble_name, best_ensemble_model, best_ensemble_metrics, composite_scores = select_best_ensemble_model(ensemble_results)

# Print all composite scores
print("\\nComposite Scores Ranking:")
for name, score in sorted(composite_scores.items(), key=lambda x: x[1], reverse=True):
    print(f"  {name:<20}: {score:.4f}")
