# Customer Churn Prediction with MLflow Experiment Tracking

This notebook demonstrates end-to-end machine learning experiment tracking using MLflow for customer churn prediction.

## Features:
- Comprehensive experiment tracking
- Model comparison and selection
- Automated model registry
- Reproducible experiments
- Detailed performance analysis

## 1. Setup MLflow Environment

In [None]:
# Install MLflow and dependencies
import subprocess
import sys

def install_package(package):
    """Install a package using pip"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"Successfully installed {package}")
    except subprocess.CalledProcessError:
        print(f"Failed to install {package}")

# Install required packages
packages = ['mlflow', 'xgboost', 'scikit-learn', 'pandas', 'numpy', 'matplotlib', 'seaborn']
for package in packages:
    try:
        __import__(package.replace('-', '_'))
        print(f"✓ {package} is available")
    except ImportError:
        print(f"Installing {package}...")
        install_package(package)

In [None]:
# Import libraries
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime
import os
import joblib

# Suppress warnings
warnings.filterwarnings('ignore')

# Configure MLflow
mlflow.set_tracking_uri("./mlruns")  # Local tracking
experiment_name = f"customer_churn_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
mlflow.set_experiment(experiment_name)

print(f"MLflow Tracking URI: {mlflow.get_tracking_uri()}")
print(f"Experiment Name: {experiment_name}")
print(f"MLflow Version: {mlflow.__version__}")

## 2. Data Loading and Initial Exploration

In [None]:
# Start MLflow run for data exploration
with mlflow.start_run(run_name="data_exploration") as run:
    # Load dataset
    data_path = "./data/customer_churn.csv"
    df = pd.read_csv(data_path)
    
    print(f"Dataset shape: {df.shape}")
    print("\nFirst 5 rows:")
    print(df.head())
    
    # Log dataset characteristics
    mlflow.log_param("dataset_path", data_path)
    mlflow.log_metric("total_samples", len(df))
    mlflow.log_metric("total_features", len(df.columns))
    mlflow.log_metric("missing_values_total", df.isnull().sum().sum())
    
    # Target distribution
    churn_distribution = df['Churn'].value_counts()
    churn_rate = (df['Churn'].sum() / len(df)) * 100
    
    mlflow.log_metric("churn_rate_percent", churn_rate)
    mlflow.log_metric("churn_count", churn_distribution[1.0] if 1.0 in churn_distribution.index else churn_distribution[1])
    mlflow.log_metric("no_churn_count", churn_distribution[0.0] if 0.0 in churn_distribution.index else churn_distribution[0])
    
    # Column information
    numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
    
    mlflow.log_param("numerical_columns", numerical_cols)
    mlflow.log_param("categorical_columns", categorical_cols)
    mlflow.log_metric("numerical_features_count", len(numerical_cols))
    mlflow.log_metric("categorical_features_count", len(categorical_cols))
    
    print(f"\nDataset info logged to MLflow run: {run.info.run_id}")
    print(f"Churn rate: {churn_rate:.2f}%")
    print(f"Numerical columns: {len(numerical_cols)}")
    print(f"Categorical columns: {len(categorical_cols)}")

## 3. Exploratory Data Analysis with MLflow Logging

In [None]:
# Start MLflow run for EDA
with mlflow.start_run(run_name="exploratory_data_analysis") as run:
    # Set style for better plots
    plt.style.use('default')
    sns.set_palette("husl")
    
    # Create comprehensive EDA plots
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Customer Churn Dataset - Exploratory Data Analysis', fontsize=16)
    
    # 1. Churn distribution
    churn_counts = df['Churn'].value_counts()
    axes[0, 0].pie(churn_counts, labels=['No Churn', 'Churn'], autopct='%1.1f%%')
    axes[0, 0].set_title('Churn Distribution')
    
    # 2-6. Feature distributions by churn
    features_to_plot = ['Age', 'Total Spend', 'Usage Frequency', 'Support Calls', 'Payment Delay']
    positions = [(0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
    
    for i, (feature, pos) in enumerate(zip(features_to_plot, positions)):
        if feature in df.columns:
            sns.boxplot(data=df, x='Churn', y=feature, ax=axes[pos])
            axes[pos].set_title(f'{feature} Distribution by Churn')
    
    plt.tight_layout()
    
    # Save and log the plot
    eda_plot_path = "eda_overview.png"
    plt.savefig(eda_plot_path, dpi=300, bbox_inches='tight')
    mlflow.log_artifact(eda_plot_path)
    plt.show()
    
    # Categorical variables analysis
    categorical_features = ['Gender', 'Subscription Type', 'Contract Length']
    available_categorical = [col for col in categorical_features if col in df.columns]
    
    if available_categorical:
        fig, axes = plt.subplots(1, len(available_categorical), figsize=(6*len(available_categorical), 6))
        if len(available_categorical) == 1:
            axes = [axes]
        
        fig.suptitle('Categorical Variables vs Churn', fontsize=16)
        
        for i, col in enumerate(available_categorical):
            churn_crosstab = pd.crosstab(df[col], df['Churn'], normalize='index') * 100
            churn_crosstab.plot(kind='bar', ax=axes[i], rot=45)
            axes[i].set_title(f'{col} vs Churn Rate (%)')
            axes[i].set_ylabel('Percentage')
            axes[i].legend(['No Churn', 'Churn'])
        
        plt.tight_layout()
        
        # Save and log categorical analysis
        categorical_plot_path = "categorical_analysis.png"
        plt.savefig(categorical_plot_path, dpi=300, bbox_inches='tight')
        mlflow.log_artifact(categorical_plot_path)
        plt.show()
    
    # Correlation heatmap
    plt.figure(figsize=(12, 10))
    correlation_matrix = df.select_dtypes(include=[np.number]).corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
                square=True, linewidths=0.1, fmt='.2f')
    plt.title('Feature Correlation Heatmap')
    
    # Save and log correlation heatmap
    correlation_plot_path = "correlation_heatmap.png"
    plt.savefig(correlation_plot_path, dpi=300, bbox_inches='tight')
    mlflow.log_artifact(correlation_plot_path)
    plt.show()
    
    # Log correlation statistics
    target_correlations = correlation_matrix['Churn'].abs().sort_values(ascending=False)
    mlflow.log_metric("highest_correlation_with_churn", target_correlations.iloc[1])  # Excluding self-correlation
    mlflow.log_param("most_correlated_feature", target_correlations.index[1])
    
    print(f"EDA plots logged to MLflow run: {run.info.run_id}")
    print(f"Most correlated feature with churn: {target_correlations.index[1]} ({target_correlations.iloc[1]:.3f})")

## 4. Data Preprocessing Pipeline with Tracking

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

# Start MLflow run for preprocessing
with mlflow.start_run(run_name="data_preprocessing") as run:
    def preprocess_data_with_tracking(df):
        """Comprehensive data preprocessing pipeline with MLflow tracking"""
        
        # Log preprocessing parameters
        mlflow.log_param("preprocessing_strategy", "StandardScaler + OneHotEncoder")
        mlflow.log_param("missing_value_strategy", "drop_rows")
        
        # Make a copy and handle missing values
        processed_df = df.copy()
        initial_shape = processed_df.shape
        
        # Drop rows with missing values
        processed_df = processed_df.dropna()
        final_shape = processed_df.shape
        
        mlflow.log_metric("rows_dropped", initial_shape[0] - final_shape[0])
        mlflow.log_metric("final_sample_count", final_shape[0])
        
        # Remove CustomerID if present
        if 'CustomerID' in processed_df.columns:
            processed_df = processed_df.drop('CustomerID', axis=1)
            mlflow.log_param("removed_customer_id", True)
        
        # Define column types
        numerical_cols = ['Age', 'Tenure', 'Usage Frequency', 'Support Calls', 
                         'Payment Delay', 'Total Spend', 'Last Interaction']
        categorical_cols = ['Gender', 'Subscription Type', 'Contract Length']
        target_col = 'Churn'
        
        # Filter existing columns
        numerical_cols = [col for col in numerical_cols if col in processed_df.columns]
        categorical_cols = [col for col in categorical_cols if col in processed_df.columns]
        
        mlflow.log_param("numerical_features", numerical_cols)
        mlflow.log_param("categorical_features", categorical_cols)
        
        # Separate features and target
        X = processed_df.drop(target_col, axis=1)
        y = processed_df[target_col]
        
        # Create preprocessing pipeline
        numerical_transformer = StandardScaler()
        categorical_transformer = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')
        
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numerical_transformer, numerical_cols),
                ('cat', categorical_transformer, categorical_cols)
            ]
        )
        
        # Fit and transform
        X_processed = preprocessor.fit_transform(X)
        
        # Get feature names
        try:
            categorical_feature_names = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols)
            all_feature_names = list(numerical_cols) + list(categorical_feature_names)
        except:
            # Fallback if get_feature_names_out is not available
            all_feature_names = list(numerical_cols) + [f"cat_{i}" for i in range(X_processed.shape[1] - len(numerical_cols))]
        
        X_processed_df = pd.DataFrame(X_processed, columns=all_feature_names)
        
        # Log preprocessing results
        mlflow.log_metric("original_features", X.shape[1])
        mlflow.log_metric("processed_features", X_processed_df.shape[1])
        mlflow.log_metric("feature_expansion_ratio", X_processed_df.shape[1] / X.shape[1])
        
        # Save preprocessor
        preprocessor_path = "preprocessor.joblib"
        joblib.dump(preprocessor, preprocessor_path)
        mlflow.log_artifact(preprocessor_path)
        
        return X_processed_df, y, preprocessor, all_feature_names
    
    # Apply preprocessing
    X_processed, y, preprocessor, feature_names = preprocess_data_with_tracking(df)
    
    print("Preprocessing completed and logged to MLflow!")
    print(f"Features shape: {X_processed.shape}")
    print(f"Target shape: {y.shape}")
    print(f"Feature names: {len(feature_names)} features")
    
    # Log target distribution after preprocessing
    final_churn_rate = (y.sum() / len(y)) * 100
    mlflow.log_metric("final_churn_rate_percent", final_churn_rate)
    
    print(f"Final churn rate: {final_churn_rate:.2f}%")

## 5. Model Training with MLflow Experiments

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score
import time

# Try to import XGBoost
try:
    from xgboost import XGBClassifier
    xgb_available = True
except ImportError:
    from sklearn.tree import DecisionTreeClassifier
    xgb_available = False
    print("XGBoost not available, using Decision Tree instead")

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")

# Define models with hyperparameters
models_config = {
    'Random Forest': {
        'model': RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1),
        'params': {'n_estimators': 100, 'max_depth': None, 'random_state': 42}
    },
    'Logistic Regression': {
        'model': LogisticRegression(random_state=42, max_iter=1000),
        'params': {'C': 1.0, 'max_iter': 1000, 'random_state': 42}
    }
}

if xgb_available:
    models_config['XGBoost'] = {
        'model': XGBClassifier(random_state=42, eval_metric='logloss'),
        'params': {'n_estimators': 100, 'max_depth': 6, 'random_state': 42}
    }
else:
    models_config['Decision Tree'] = {
        'model': DecisionTreeClassifier(random_state=42, max_depth=10),
        'params': {'max_depth': 10, 'random_state': 42}
    }

# Train models with MLflow tracking
model_results = {}

for model_name, config in models_config.items():
    with mlflow.start_run(run_name=f"model_{model_name.lower().replace(' ', '_')}") as run:
        print(f"\nTraining {model_name}...")
        
        # Log model parameters
        mlflow.log_params(config['params'])
        mlflow.log_param("model_type", model_name)
        
        model = config['model']
        
        # Training time tracking
        start_time = time.time()
        
        # Cross-validation
        cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
        cv_roc_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='roc_auc')
        
        # Log cross-validation metrics
        mlflow.log_metric("cv_accuracy_mean", cv_scores.mean())
        mlflow.log_metric("cv_accuracy_std", cv_scores.std())
        mlflow.log_metric("cv_roc_auc_mean", cv_roc_scores.mean())
        mlflow.log_metric("cv_roc_auc_std", cv_roc_scores.std())
        
        # Train final model
        model.fit(X_train, y_train)
        
        training_time = time.time() - start_time
        mlflow.log_metric("training_time_seconds", training_time)
        
        # Predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        
        # Calculate metrics
        test_accuracy = accuracy_score(y_test, y_pred)
        test_roc_auc = roc_auc_score(y_test, y_pred_proba)
        test_precision = precision_score(y_test, y_pred)
        test_recall = recall_score(y_test, y_pred)
        test_f1 = f1_score(y_test, y_pred)
        
        # Log test metrics
        mlflow.log_metric("test_accuracy", test_accuracy)
        mlflow.log_metric("test_roc_auc", test_roc_auc)
        mlflow.log_metric("test_precision", test_precision)
        mlflow.log_metric("test_recall", test_recall)
        mlflow.log_metric("test_f1_score", test_f1)
        
        # Log model
        if model_name == 'XGBoost' and xgb_available:
            mlflow.xgboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")
        
        # Feature importance (for tree-based models)
        if hasattr(model, 'feature_importances_'):
            feature_importance = pd.DataFrame({
                'feature': feature_names,
                'importance': model.feature_importances_
            }).sort_values('importance', ascending=False)
            
            # Save feature importance plot
            plt.figure(figsize=(10, 8))
            top_features = feature_importance.head(15)
            plt.barh(range(len(top_features)), top_features['importance'])
            plt.yticks(range(len(top_features)), top_features['feature'])
            plt.xlabel('Importance')
            plt.title(f'Top 15 Feature Importances - {model_name}')
            plt.gca().invert_yaxis()
            
            importance_plot_path = f"feature_importance_{model_name.lower().replace(' ', '_')}.png"
            plt.savefig(importance_plot_path, dpi=300, bbox_inches='tight')
            mlflow.log_artifact(importance_plot_path)
            plt.show()
            
            # Log top feature importance
            mlflow.log_metric("top_feature_importance", top_features.iloc[0]['importance'])
            mlflow.log_param("most_important_feature", top_features.iloc[0]['feature'])
        
        # Store results
        model_results[model_name] = {
            'model': model,
            'test_accuracy': test_accuracy,
            'test_roc_auc': test_roc_auc,
            'test_precision': test_precision,
            'test_recall': test_recall,
            'test_f1': test_f1,
            'cv_accuracy_mean': cv_scores.mean(),
            'cv_roc_auc_mean': cv_roc_scores.mean(),
            'run_id': run.info.run_id
        }
        
        print(f"  CV Accuracy: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
        print(f"  CV ROC-AUC: {cv_roc_scores.mean():.4f} (+/- {cv_roc_scores.std() * 2:.4f})")
        print(f"  Test Accuracy: {test_accuracy:.4f}")
        print(f"  Test ROC-AUC: {test_roc_auc:.4f}")
        print(f"  Training Time: {training_time:.2f} seconds")

print("\nAll models trained and logged to MLflow!")

## 6. Model Comparison and Selection

In [None]:
# Start MLflow run for model comparison
with mlflow.start_run(run_name="model_comparison") as run:
    
    # Create comparison DataFrame
    comparison_data = []
    for model_name, results in model_results.items():
        comparison_data.append({
            'Model': model_name,
            'Test Accuracy': results['test_accuracy'],
            'Test ROC-AUC': results['test_roc_auc'],
            'Test Precision': results['test_precision'],
            'Test Recall': results['test_recall'],
            'Test F1-Score': results['test_f1'],
            'CV Accuracy': results['cv_accuracy_mean'],
            'CV ROC-AUC': results['cv_roc_auc_mean']
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    print("Model Comparison:")
    print(comparison_df.round(4))
    
    # Save comparison table
    comparison_path = "model_comparison.csv"
    comparison_df.to_csv(comparison_path, index=False)
    mlflow.log_artifact(comparison_path)
    
    # Create performance comparison plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle('Model Performance Comparison', fontsize=16)
    
    # Accuracy comparison
    axes[0, 0].bar(comparison_df['Model'], comparison_df['Test Accuracy'])
    axes[0, 0].set_title('Test Accuracy Comparison')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].tick_params(axis='x', rotation=45)
    
    # ROC-AUC comparison
    axes[0, 1].bar(comparison_df['Model'], comparison_df['Test ROC-AUC'])
    axes[0, 1].set_title('Test ROC-AUC Comparison')
    axes[0, 1].set_ylabel('ROC-AUC')
    axes[0, 1].tick_params(axis='x', rotation=45)
    
    # Precision vs Recall
    axes[1, 0].scatter(comparison_df['Test Recall'], comparison_df['Test Precision'], s=100)
    for i, model in enumerate(comparison_df['Model']):
        axes[1, 0].annotate(model, 
                           (comparison_df['Test Recall'].iloc[i], comparison_df['Test Precision'].iloc[i]),
                           xytext=(5, 5), textcoords='offset points')
    axes[1, 0].set_xlabel('Recall')
    axes[1, 0].set_ylabel('Precision')
    axes[1, 0].set_title('Precision vs Recall')
    
    # F1-Score comparison
    axes[1, 1].bar(comparison_df['Model'], comparison_df['Test F1-Score'])
    axes[1, 1].set_title('Test F1-Score Comparison')
    axes[1, 1].set_ylabel('F1-Score')
    axes[1, 1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    
    # Save comparison plot
    comparison_plot_path = "model_performance_comparison.png"
    plt.savefig(comparison_plot_path, dpi=300, bbox_inches='tight')
    mlflow.log_artifact(comparison_plot_path)
    plt.show()
    
    # Select best model based on ROC-AUC
    best_model_name = comparison_df.loc[comparison_df['Test ROC-AUC'].idxmax(), 'Model']
    best_model_metrics = model_results[best_model_name]
    
    # Log best model information
    mlflow.log_param("best_model", best_model_name)
    mlflow.log_metric("best_model_accuracy", best_model_metrics['test_accuracy'])
    mlflow.log_metric("best_model_roc_auc", best_model_metrics['test_roc_auc'])
    mlflow.log_param("best_model_run_id", best_model_metrics['run_id'])
    
    print(f"\nBest Model: {best_model_name}")
    print(f"Best Model ROC-AUC: {best_model_metrics['test_roc_auc']:.4f}")
    print(f"Best Model Accuracy: {best_model_metrics['test_accuracy']:.4f}")
    
    # Log comparison metrics
    mlflow.log_metric("models_compared", len(model_results))
    mlflow.log_metric("max_roc_auc", comparison_df['Test ROC-AUC'].max())
    mlflow.log_metric("min_roc_auc", comparison_df['Test ROC-AUC'].min())
    mlflow.log_metric("roc_auc_range", comparison_df['Test ROC-AUC'].max() - comparison_df['Test ROC-AUC'].min())

## 7. Model Registration and Deployment Preparation

In [None]:
# Register the best model
model_name_registry = "customer_churn_predictor"

# Get the best model run
best_run_id = best_model_metrics['run_id']
model_uri = f"runs:/{best_run_id}/model"

try:
    # Register model
    model_version = mlflow.register_model(
        model_uri=model_uri,
        name=model_name_registry,
        description=f"Best performing model ({best_model_name}) for customer churn prediction"
    )
    
    print(f"Model registered: {model_name_registry}")
    print(f"Model version: {model_version.version}")
    print(f"Model run ID: {best_run_id}")
    
    # Transition model to Staging
    client = mlflow.tracking.MlflowClient()
    client.transition_model_version_stage(
        name=model_name_registry,
        version=model_version.version,
        stage="Staging"
    )
    
    print(f"Model transitioned to Staging stage")
    
    # Add model description and tags
    client.update_model_version(
        name=model_name_registry,
        version=model_version.version,
        description=f"""
        Best performing customer churn prediction model.
        Model Type: {best_model_name}
        ROC-AUC: {best_model_metrics['test_roc_auc']:.4f}
        Accuracy: {best_model_metrics['test_accuracy']:.4f}
        Training Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
        """
    )
    
    # Set tags
    client.set_model_version_tag(
        name=model_name_registry,
        version=model_version.version,
        key="model_type",
        value=best_model_name
    )
    
    client.set_model_version_tag(
        name=model_name_registry,
        version=model_version.version,
        key="validation_status",
        value="validated"
    )
    
except Exception as e:
    print(f"Model registration failed: {e}")
    print("This might be due to MLflow server configuration. Model tracking still works.")

# Create deployment preparation summary
deployment_info = {
    "model_name": best_model_name,
    "model_uri": model_uri,
    "run_id": best_run_id,
    "accuracy": best_model_metrics['test_accuracy'],
    "roc_auc": best_model_metrics['test_roc_auc'],
    "precision": best_model_metrics['test_precision'],
    "recall": best_model_metrics['test_recall'],
    "f1_score": best_model_metrics['test_f1']
}

# Save deployment info
import json
with open("deployment_info.json", "w") as f:
    json.dump(deployment_info, f, indent=2)

print("\nDeployment Information:")
for key, value in deployment_info.items():
    print(f"  {key}: {value}")

## 8. Experiment Analysis and Visualization

In [None]:
# Create comprehensive experiment summary
with mlflow.start_run(run_name="experiment_summary") as run:
    
    # Get experiment information
    experiment = mlflow.get_experiment_by_name(experiment_name)
    experiment_id = experiment.experiment_id
    
    # Get all runs from the experiment
    runs = mlflow.search_runs(experiment_ids=[experiment_id])
    
    print(f"Experiment Analysis for: {experiment_name}")
    print(f"Total runs: {len(runs)}")
    print(f"Experiment ID: {experiment_id}")
    
    # Create experiment timeline
    model_runs = runs[runs['tags.mlflow.runName'].str.contains('model_', na=False)]
    
    if not model_runs.empty:
        # Performance over time plot
        plt.figure(figsize=(12, 8))
        
        # Extract model names and metrics
        model_names = []
        accuracies = []
        roc_aucs = []
        
        for _, run_row in model_runs.iterrows():
            run_name = run_row['tags.mlflow.runName']
            model_name = run_name.replace('model_', '').replace('_', ' ').title()
            model_names.append(model_name)
            accuracies.append(run_row.get('metrics.test_accuracy', 0))
            roc_aucs.append(run_row.get('metrics.test_roc_auc', 0))
        
        # Create performance dashboard
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle(f'Experiment Dashboard: {experiment_name}', fontsize=16)
        
        # Model performance radar chart preparation
        metrics_for_radar = ['test_accuracy', 'test_roc_auc', 'test_precision', 'test_recall', 'test_f1_score']
        
        # Performance comparison
        x_pos = np.arange(len(model_names))
        width = 0.35
        
        axes[0, 0].bar(x_pos - width/2, accuracies, width, label='Accuracy', alpha=0.8)
        axes[0, 0].bar(x_pos + width/2, roc_aucs, width, label='ROC-AUC', alpha=0.8)
        axes[0, 0].set_xlabel('Models')
        axes[0, 0].set_ylabel('Score')
        axes[0, 0].set_title('Model Performance Comparison')
        axes[0, 0].set_xticks(x_pos)
        axes[0, 0].set_xticklabels(model_names, rotation=45)
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
        
        # ROC-AUC distribution
        axes[0, 1].hist(roc_aucs, bins=min(5, len(roc_aucs)), alpha=0.7, edgecolor='black')
        axes[0, 1].axvline(np.mean(roc_aucs), color='red', linestyle='--', label=f'Mean: {np.mean(roc_aucs):.3f}')
        axes[0, 1].set_xlabel('ROC-AUC Score')
        axes[0, 1].set_ylabel('Frequency')
        axes[0, 1].set_title('ROC-AUC Score Distribution')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
        
        # Training time comparison (if available)
        training_times = []
        for _, run_row in model_runs.iterrows():
            training_time = run_row.get('metrics.training_time_seconds', 0)
            training_times.append(training_time)
        
        if any(t > 0 for t in training_times):
            axes[1, 0].bar(model_names, training_times)
            axes[1, 0].set_xlabel('Models')
            axes[1, 0].set_ylabel('Training Time (seconds)')
            axes[1, 0].set_title('Training Time Comparison')
            axes[1, 0].tick_params(axis='x', rotation=45)
            axes[1, 0].grid(True, alpha=0.3)
        else:
            axes[1, 0].text(0.5, 0.5, 'Training time\ndata not available', 
                           ha='center', va='center', transform=axes[1, 0].transAxes)
            axes[1, 0].set_title('Training Time Comparison')
        
        # Model efficiency scatter plot (Performance vs Training Time)
        if any(t > 0 for t in training_times):
            scatter = axes[1, 1].scatter(training_times, roc_aucs, s=100, alpha=0.7)
            for i, model in enumerate(model_names):
                axes[1, 1].annotate(model, (training_times[i], roc_aucs[i]), 
                                   xytext=(5, 5), textcoords='offset points')
            axes[1, 1].set_xlabel('Training Time (seconds)')
            axes[1, 1].set_ylabel('ROC-AUC Score')
            axes[1, 1].set_title('Model Efficiency (Performance vs Time)')
            axes[1, 1].grid(True, alpha=0.3)
        else:
            # Show performance ranking instead
            performance_rank = list(range(1, len(model_names) + 1))
            sorted_indices = np.argsort(roc_aucs)[::-1]
            
            axes[1, 1].bar([model_names[i] for i in sorted_indices], 
                          [roc_aucs[i] for i in sorted_indices])
            axes[1, 1].set_xlabel('Models (Ranked by Performance)')
            axes[1, 1].set_ylabel('ROC-AUC Score')
            axes[1, 1].set_title('Model Performance Ranking')
            axes[1, 1].tick_params(axis='x', rotation=45)
            axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # Save experiment dashboard
        dashboard_path = "experiment_dashboard.png"
        plt.savefig(dashboard_path, dpi=300, bbox_inches='tight')
        mlflow.log_artifact(dashboard_path)
        plt.show()
    
    # Log experiment summary metrics
    mlflow.log_metric("total_runs_in_experiment", len(runs))
    mlflow.log_metric("total_model_runs", len(model_runs) if not model_runs.empty else 0)
    
    if not model_runs.empty:
        mlflow.log_metric("best_roc_auc_overall", max(roc_aucs))
        mlflow.log_metric("avg_roc_auc_overall", np.mean(roc_aucs))
        mlflow.log_metric("std_roc_auc_overall", np.std(roc_aucs))
    
    # Create final experiment report
    report = f"""
    EXPERIMENT SUMMARY REPORT
    ========================
    
    Experiment: {experiment_name}
    Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
    
    Dataset Information:
    - Total samples: {len(df)}
    - Features after preprocessing: {len(feature_names)}
    - Churn rate: {(y.sum() / len(y) * 100):.2f}%
    
    Models Evaluated: {len(model_results)}
    
    Best Model: {best_model_name}
    - ROC-AUC: {best_model_metrics['test_roc_auc']:.4f}
    - Accuracy: {best_model_metrics['test_accuracy']:.4f}
    - Precision: {best_model_metrics['test_precision']:.4f}
    - Recall: {best_model_metrics['test_recall']:.4f}
    - F1-Score: {best_model_metrics['test_f1']:.4f}
    
    Model Performance Range:
    - Best ROC-AUC: {max(roc_aucs) if roc_aucs else 'N/A':.4f}
    - Worst ROC-AUC: {min(roc_aucs) if roc_aucs else 'N/A':.4f}
    - Average ROC-AUC: {np.mean(roc_aucs) if roc_aucs else 'N/A':.4f}
    
    Next Steps:
    1. Review model performance in MLflow UI
    2. Deploy best model to staging environment
    3. Set up monitoring and feedback loops
    4. Schedule model retraining
    """
    
    # Save report
    with open("experiment_report.txt", "w") as f:
        f.write(report)
    
    mlflow.log_artifact("experiment_report.txt")
    
    print(report)
    
    print(f"\n🎉 Experiment completed successfully!")
    print(f"\n📊 View results in MLflow UI:")
    print(f"   Run: mlflow ui --backend-store-uri ./mlruns")
    print(f"   Then open: http://localhost:5000")
    
    print(f"\n📁 Artifacts saved:")
    print(f"   - Model artifacts in MLflow")
    print(f"   - Preprocessor: preprocessor.joblib")
    print(f"   - Experiment report: experiment_report.txt")
    print(f"   - Deployment info: deployment_info.json")

## MLflow UI Instructions

To view your experiment results:

1. **Start MLflow UI**:
   ```bash
   mlflow ui --backend-store-uri ./mlruns
   ```

2. **Open browser**: Navigate to `http://localhost:5000`

3. **Explore experiments**:
   - Compare model performance
   - View artifacts and plots
   - Check model registry
   - Download models and preprocessors

4. **Model deployment**:
   ```python
   # Load model for inference
   model = mlflow.sklearn.load_model(f"runs:/{best_run_id}/model")
   
   # Load preprocessor
   import joblib
   preprocessor = joblib.load("preprocessor.joblib")
   ```

## Key Features Implemented:

✅ **Comprehensive Experiment Tracking**
✅ **Automated Model Comparison**
✅ **Artifact Management**
✅ **Model Registry Integration**
✅ **Performance Visualization**
✅ **Reproducible Experiments**
✅ **Deployment Preparation**