In [1]:
# Customer Churn Prediction - Model Training
# CodSoft ML Internship - Task 3
# Author: Chandan Kumar

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import (classification_report, confusion_matrix, 
                             roc_auc_score, roc_curve, accuracy_score,
                             f1_score, precision_score, recall_score)
import joblib
import json
import warnings
warnings.filterwarnings('ignore')

print("="*70)
print("CUSTOMER CHURN PREDICTION - MODEL TRAINING")
print("="*70)

CUSTOMER CHURN PREDICTION - MODEL TRAINING


In [3]:
# 1. LOAD PROCESSED DATA

print("\nüìÇ Loading processed dataset...")
df = pd.read_csv('../data/Churn_Modelling_processed.csv')

print(f"‚úÖ Dataset loaded: {df.shape}")

# Separate features and target
X = df.drop('Exited', axis=1)
y = df['Exited']

print(f"   Features shape: {X.shape}")
print(f"   Target shape: {y.shape}")
print(f"   Churn cases: {y.sum():,} ({(y.sum()/len(y)*100):.2f}%)")


üìÇ Loading processed dataset...
‚úÖ Dataset loaded: (10000, 12)
   Features shape: (10000, 11)
   Target shape: (10000,)
   Churn cases: 2,037 (20.37%)


In [4]:
# 2. TRAIN-TEST SPLIT

print("\n" + "="*70)
print("SPLITTING DATA")
print("="*70)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\n‚úÖ Data split completed:")
print(f"   Training set: {X_train.shape[0]:,} samples")
print(f"   Test set: {X_test.shape[0]:,} samples")
print(f"   Train churn: {y_train.sum():,} ({(y_train.sum()/len(y_train)*100):.2f}%)")
print(f"   Test churn: {y_test.sum():,} ({(y_test.sum()/len(y_test)*100):.2f}%)")


SPLITTING DATA

‚úÖ Data split completed:
   Training set: 8,000 samples
   Test set: 2,000 samples
   Train churn: 1,630 (20.38%)
   Test churn: 407 (20.35%)


In [5]:
# 3. BASELINE MODELS

print("\n" + "="*70)
print("PHASE 1: BASELINE MODELS")
print("="*70)

results = {}

def train_evaluate_model(model, model_name, X_tr, y_tr, X_te, y_te):
    """Train and evaluate a model"""
    print(f"\nüîÑ Training {model_name}...")
    
    # Train
    model.fit(X_tr, y_tr)
    
    # Predictions
    y_pred = model.predict(X_te)
    y_pred_proba = model.predict_proba(X_te)[:, 1] if hasattr(model, 'predict_proba') else None
    
    # Metrics
    accuracy = accuracy_score(y_te, y_pred)
    precision = precision_score(y_te, y_pred)
    recall = recall_score(y_te, y_pred)
    f1 = f1_score(y_te, y_pred)
    roc_auc = roc_auc_score(y_te, y_pred_proba) if y_pred_proba is not None else None
    
    print(f"   Accuracy:  {accuracy:.4f}")
    print(f"   Precision: {precision:.4f}")
    print(f"   Recall:    {recall:.4f}")
    print(f"   F1-Score:  {f1:.4f}")
    if roc_auc:
        print(f"   ROC-AUC:   {roc_auc:.4f}")
    
    return {
        'model': model,
        'accuracy': float(accuracy),
        'precision': float(precision),
        'recall': float(recall),
        'f1_score': float(f1),
        'roc_auc': float(roc_auc) if roc_auc else None,
        'predictions': y_pred,
        'probabilities': y_pred_proba
    }

# Train baseline models
results['Logistic Regression (Baseline)'] = train_evaluate_model(
    LogisticRegression(random_state=42, max_iter=1000),
    "Logistic Regression",
    X_train, y_train, X_test, y_test
)

results['Random Forest (Baseline)'] = train_evaluate_model(
    RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=2),
    "Random Forest",
    X_train, y_train, X_test, y_test
)

results['Gradient Boosting (Baseline)'] = train_evaluate_model(
    GradientBoostingClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting",
    X_train, y_train, X_test, y_test
)


PHASE 1: BASELINE MODELS

üîÑ Training Logistic Regression...
   Accuracy:  0.8080
   Precision: 0.5891
   Recall:    0.1867
   F1-Score:  0.2836
   ROC-AUC:   0.7748

üîÑ Training Random Forest...
   Accuracy:  0.8615
   Precision: 0.7708
   Recall:    0.4545
   F1-Score:  0.5719
   ROC-AUC:   0.8530

üîÑ Training Gradient Boosting...
   Accuracy:  0.8700
   Precision: 0.7928
   Recall:    0.4889
   F1-Score:  0.6049
   ROC-AUC:   0.8708


In [6]:
# 4. HYPERPARAMETER TUNING

print("\n" + "="*70)
print("PHASE 2: HYPERPARAMETER TUNING")
print("="*70)

# Logistic Regression Tuning
print("\nüîç Tuning Logistic Regression...")
lr_param_grid = {
    'C': [0.01, 0.1, 1, 10],
    'penalty': ['l2'],
    'solver': ['liblinear', 'lbfgs'],
    'class_weight': ['balanced', None]
}

lr_grid = GridSearchCV(
    LogisticRegression(random_state=42, max_iter=1000),
    lr_param_grid,
    cv=3,
    scoring='f1',
    n_jobs=2,
    verbose=1
)

lr_grid.fit(X_train, y_train)
print(f"‚úÖ Best params: {lr_grid.best_params_}")
print(f"‚úÖ Best CV F1: {lr_grid.best_score_:.4f}")

results['Logistic Regression (Tuned)'] = train_evaluate_model(
    lr_grid.best_estimator_,
    "Logistic Regression (Tuned)",
    X_train, y_train, X_test, y_test
)
results['Logistic Regression (Tuned)']['best_params'] = lr_grid.best_params_

# Random Forest Tuning
print("\nüîç Tuning Random Forest...")
rf_param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [10, 20, None],
    'min_samples_split': [5, 10],
    'class_weight': ['balanced', None]
}

rf_grid = GridSearchCV(
    RandomForestClassifier(random_state=42, n_jobs=2),
    rf_param_grid,
    cv=3,
    scoring='f1',
    n_jobs=1,
    verbose=1
)

rf_grid.fit(X_train, y_train)
print(f"‚úÖ Best params: {rf_grid.best_params_}")
print(f"‚úÖ Best CV F1: {rf_grid.best_score_:.4f}")

results['Random Forest (Tuned)'] = train_evaluate_model(
    rf_grid.best_estimator_,
    "Random Forest (Tuned)",
    X_train, y_train, X_test, y_test
)
results['Random Forest (Tuned)']['best_params'] = rf_grid.best_params_

# Gradient Boosting Tuning
print("\nüîç Tuning Gradient Boosting...")
gb_param_grid = {
    'n_estimators': [50, 100],
    'learning_rate': [0.01, 0.1],
    'max_depth': [3, 5],
    'min_samples_split': [5, 10]
}

gb_grid = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    gb_param_grid,
    cv=3,
    scoring='f1',
    n_jobs=2,
    verbose=1
)

gb_grid.fit(X_train, y_train)
print(f"‚úÖ Best params: {gb_grid.best_params_}")
print(f"‚úÖ Best CV F1: {gb_grid.best_score_:.4f}")

results['Gradient Boosting (Tuned)'] = train_evaluate_model(
    gb_grid.best_estimator_,
    "Gradient Boosting (Tuned)",
    X_train, y_train, X_test, y_test
)
results['Gradient Boosting (Tuned)']['best_params'] = gb_grid.best_params_


PHASE 2: HYPERPARAMETER TUNING

üîç Tuning Logistic Regression...
Fitting 3 folds for each of 16 candidates, totalling 48 fits
‚úÖ Best params: {'C': 0.01, 'class_weight': 'balanced', 'penalty': 'l2', 'solver': 'lbfgs'}
‚úÖ Best CV F1: 0.4972

üîÑ Training Logistic Regression (Tuned)...
   Accuracy:  0.7185
   Precision: 0.3937
   Recall:    0.7101
   F1-Score:  0.5066
   ROC-AUC:   0.7781

üîç Tuning Random Forest...
Fitting 3 folds for each of 24 candidates, totalling 72 fits
‚úÖ Best params: {'class_weight': 'balanced', 'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
‚úÖ Best CV F1: 0.6219

üîÑ Training Random Forest (Tuned)...
   Accuracy:  0.8375
   Precision: 0.5911
   Recall:    0.6536
   F1-Score:  0.6208
   ROC-AUC:   0.8612

üîç Tuning Gradient Boosting...
Fitting 3 folds for each of 16 candidates, totalling 48 fits
‚úÖ Best params: {'learning_rate': 0.1, 'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 100}
‚úÖ Best CV F1: 0.5888

üîÑ Training Gr

In [7]:
# 5. MODEL COMPARISON & SELECTION

print("\n" + "="*70)
print("MODEL COMPARISON")
print("="*70)

# Compare models
comparison_data = []
for name, metrics in results.items():
    if 'Tuned' in name:
        comparison_data.append({
            'Model': name.replace(' (Tuned)', ''),
            'Accuracy': metrics['accuracy'],
            'Precision': metrics['precision'],
            'Recall': metrics['recall'],
            'F1-Score': metrics['f1_score'],
            'ROC-AUC': metrics['roc_auc']
        })

comparison_df = pd.DataFrame(comparison_data)
print("\nüìä Tuned Models Performance:")
print(comparison_df.to_string(index=False))

# Select best model based on F1-score
best_model_name = max([k for k in results.keys() if 'Tuned' in k], 
                       key=lambda x: results[x]['f1_score'])
best_model = results[best_model_name]['model']
best_metrics = results[best_model_name]

print(f"\nüèÜ BEST MODEL: {best_model_name}")
print(f"   Accuracy:  {best_metrics['accuracy']:.4f}")
print(f"   Precision: {best_metrics['precision']:.4f}")
print(f"   Recall:    {best_metrics['recall']:.4f}")
print(f"   F1-Score:  {best_metrics['f1_score']:.4f}")
print(f"   ROC-AUC:   {best_metrics['roc_auc']:.4f}")


MODEL COMPARISON

üìä Tuned Models Performance:
              Model  Accuracy  Precision   Recall  F1-Score  ROC-AUC
Logistic Regression    0.7185   0.393733 0.710074  0.506573 0.778072
      Random Forest    0.8375   0.591111 0.653563  0.620770 0.861214
  Gradient Boosting    0.8690   0.786561 0.488943  0.603030 0.864974

üèÜ BEST MODEL: Random Forest (Tuned)
   Accuracy:  0.8375
   Precision: 0.5911
   Recall:    0.6536
   F1-Score:  0.6208
   ROC-AUC:   0.8612


In [12]:
# 6. SAVE BEST MODEL

print("\nüíæ Saving best model...")
joblib.dump(best_model, '../models/churn_prediction_model.pkl')
print("‚úÖ Model saved: ../models/churn_prediction_model.pkl")

# Save all models in results
for name, result in results.items():
    clean_name = name.lower().replace(" ", "_").replace("(tuned)", "")
    filename = f"../models/{clean_name}_model.pkl"
    joblib.dump(result['model'], filename)
    print(f"‚úÖ Saved: {filename}")


üíæ Saving best model...
‚úÖ Model saved: ../models/churn_prediction_model.pkl
‚úÖ Saved: ../models/logistic_regression_(baseline)_model.pkl
‚úÖ Saved: ../models/random_forest_(baseline)_model.pkl
‚úÖ Saved: ../models/gradient_boosting_(baseline)_model.pkl
‚úÖ Saved: ../models/logistic_regression__model.pkl
‚úÖ Saved: ../models/random_forest__model.pkl
‚úÖ Saved: ../models/gradient_boosting__model.pkl


In [13]:
# 7. DETAILED EVALUATION

print("\n" + "="*70)
print("DETAILED EVALUATION OF BEST MODEL")
print("="*70)

y_pred_best = best_metrics['predictions']
y_proba_best = best_metrics['probabilities']

# Classification Report
print("\nüìã Classification Report:")
print(classification_report(y_test, y_pred_best, 
                           target_names=['Retained', 'Churned'],
                           digits=4))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_best)
print("\nüî¢ Confusion Matrix:")
print(f"   True Negatives (Retained correctly):  {cm[0][0]:,}")
print(f"   False Positives (False churn alert): {cm[0][1]:,}")
print(f"   False Negatives (Missed churners):   {cm[1][0]:,}")
print(f"   True Positives (Churners caught):    {cm[1][1]:,}")

# Calculate retention metrics
tn, fp, fn, tp = cm.ravel()
retention_rate = tn / (tn + fp)
churn_detection_rate = tp / (tp + fn)

print(f"\nüìä Business Metrics:")
print(f"   Retention Detection Rate: {retention_rate:.2%}")
print(f"   Churn Detection Rate: {churn_detection_rate:.2%}")
print(f"   False Alarm Rate: {fp / (fp + tn):.2%}")



DETAILED EVALUATION OF BEST MODEL

üìã Classification Report:
              precision    recall  f1-score   support

    Retained     0.9090    0.8845    0.8966      1593
     Churned     0.5911    0.6536    0.6208       407

    accuracy                         0.8375      2000
   macro avg     0.7501    0.7690    0.7587      2000
weighted avg     0.8443    0.8375    0.8405      2000


üî¢ Confusion Matrix:
   True Negatives (Retained correctly):  1,409
   False Positives (False churn alert): 184
   False Negatives (Missed churners):   141
   True Positives (Churners caught):    266

üìä Business Metrics:
   Retention Detection Rate: 88.45%
   Churn Detection Rate: 65.36%
   False Alarm Rate: 11.55%


In [14]:
# 8. FEATURE IMPORTANCE

print("\n" + "="*70)
print("FEATURE IMPORTANCE ANALYSIS")
print("="*70)

# Get feature importance (for tree-based models)
if hasattr(best_model, 'feature_importances_'):
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': best_model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nüî• Top 10 Most Important Features:")
    print(feature_importance.head(10).to_string(index=False))
    
    # Visualization
    plt.figure(figsize=(10, 6))
    top_features = feature_importance.head(10)
    plt.barh(top_features['feature'], top_features['importance'])
    plt.xlabel('Importance')
    plt.title('Top 10 Feature Importance')
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.savefig('../images/feature_importance.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("\n‚úÖ Feature importance saved: ../images/feature_importance.png")


FEATURE IMPORTANCE ANALYSIS

üî• Top 10 Most Important Features:
          feature  importance
              Age    0.322651
    NumOfProducts    0.208108
          Balance    0.118420
  EstimatedSalary    0.081831
      CreditScore    0.077775
   IsActiveMember    0.052461
Geography_Germany    0.051374
           Tenure    0.044260
           Gender    0.021902
        HasCrCard    0.010630

‚úÖ Feature importance saved: ../images/feature_importance.png


In [15]:
# 9. VISUALIZATIONS

print("\nüìä Creating visualizations...")

# 1. Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Retained', 'Churned'],
            yticklabels=['Retained', 'Churned'])
plt.title(f'Confusion Matrix - {best_model_name}\nF1-Score: {best_metrics["f1_score"]:.4f}')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig('../images/confusion_matrix_churn.png', dpi=300, bbox_inches='tight')
plt.close()

# 2. Model Comparison
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

models = [m.replace(' (Tuned)', '') for m in results.keys() if 'Tuned' in m]
accuracy_scores = [results[m]['accuracy'] for m in results.keys() if 'Tuned' in m]
f1_scores = [results[m]['f1_score'] for m in results.keys() if 'Tuned' in m]

x = np.arange(len(models))
width = 0.35

axes[0].bar(x - width/2, accuracy_scores, width, label='Accuracy', alpha=0.8)
axes[0].bar(x + width/2, f1_scores, width, label='F1-Score', alpha=0.8)
axes[0].set_xlabel('Models')
axes[0].set_ylabel('Score')
axes[0].set_title('Model Performance Comparison')
axes[0].set_xticks(x)
axes[0].set_xticklabels(models, rotation=15, ha='right')
axes[0].legend()
axes[0].set_ylim([0, 1])
axes[0].grid(axis='y', alpha=0.3)

# ROC-AUC comparison
roc_scores = [results[m]['roc_auc'] for m in results.keys() if 'Tuned' in m]
colors = ['red' if m == best_model_name.replace(' (Tuned)', '') else 'skyblue' for m in models]
axes[1].bar(models, roc_scores, color=colors)
axes[1].set_ylabel('ROC-AUC Score')
axes[1].set_title('ROC-AUC Score Comparison')
axes[1].set_ylim([0, 1])
axes[1].set_xticklabels(models, rotation=15, ha='right')
for i, v in enumerate(roc_scores):
    axes[1].text(i, v + 0.01, f'{v:.4f}', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('../images/model_comparison_churn.png', dpi=300, bbox_inches='tight')
plt.close()

# 3. ROC Curves
plt.figure(figsize=(10, 8))

for name in [k for k in results.keys() if 'Tuned' in k]:
    if results[name]['probabilities'] is not None:
        fpr, tpr, _ = roc_curve(y_test, results[name]['probabilities'])
        roc_auc = results[name]['roc_auc']
        plt.plot(fpr, tpr, lw=2, label=f'{name.replace(" (Tuned)", "")} (AUC = {roc_auc:.4f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Guess')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - Churn Prediction Models')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('../images/roc_curves_churn.png', dpi=300, bbox_inches='tight')
plt.close()

print("‚úÖ All visualizations saved!")


üìä Creating visualizations...
‚úÖ All visualizations saved!


In [17]:
# 10. SAVE METRICS

print("\nüíæ Saving metrics...")

# Prepare results for JSON
json_results = {}
for name, metrics in results.items():
    json_results[name] = {
        'accuracy': metrics['accuracy'],
        'precision': metrics['precision'],
        'recall': metrics['recall'],
        'f1_score': metrics['f1_score'],
        'roc_auc': metrics['roc_auc']
    }
    if 'best_params' in metrics:
        json_results[name]['best_params'] = metrics['best_params']

# Save metrics
with open('../artifacts/training_metrics.json', 'w') as f:
    json.dump({
        'best_model': best_model_name,
        'all_results': json_results,
        'confusion_matrix': {
            'true_negatives': int(tn),
            'false_positives': int(fp),
            'false_negatives': int(fn),
            'true_positives': int(tp)
        },
        'business_metrics': {
            'retention_rate': float(retention_rate),
            'churn_detection_rate': float(churn_detection_rate)
        }
    }, f, indent=4)

print("‚úÖ Metrics saved: ../artifacts/training_metrics.json")

# Save classification report
with open('../artifacts/classification_report_churn.txt', 'w') as f:
    f.write("="*70 + "\n")
    f.write("CUSTOMER CHURN PREDICTION - MODEL PERFORMANCE\n")
    f.write("="*70 + "\n\n")
    f.write(f"Best Model: {best_model_name}\n\n")
    f.write("Classification Report:\n")
    f.write("="*70 + "\n")
    f.write(classification_report(y_test, y_pred_best, 
                                  target_names=['Retained', 'Churned'],
                                  digits=4))

print("‚úÖ Classification report saved!")


üíæ Saving metrics...
‚úÖ Metrics saved: ../artifacts/training_metrics.json
‚úÖ Classification report saved!


In [18]:
# 11. SUMMARY

print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETED SUCCESSFULLY!")
print("="*70)

print("\nüìÅ Generated Files:")
print("   ‚úÖ ../models/churn_prediction_model.pkl")
print("   ‚úÖ ../artifacts/confusion_matrix_churn.png")
print("   ‚úÖ ../artifacts/model_comparison_churn.png")
print("   ‚úÖ ../artifacts/roc_curves_churn.png")
print("   ‚úÖ ../artifacts/feature_importance.png")
print("   ‚úÖ ../artifacts/training_metrics.json")
print("   artifacts/classification_report_churn.txt")
print("\nüèÜ Best Model Performance:")
print(f"   Model: {best_model_name}")
print(f"   Accuracy: {best_metrics['accuracy']:.4f}")
print(f"   F1-Score: {best_metrics['f1_score']:.4f}")
print(f"   ROC-AUC: {best_metrics['roc_auc']:.4f}")

print("\nüíº Business Impact:")
print(f"   Can identify {tp} out of {tp+fn} potential churners")
print(f"   Churn detection rate: {churn_detection_rate:.2%}")
print(f"   False alarm rate: {fp/(fp+tn):.2%}")

print("\nüöÄ Next Steps:")
print("   1. Test model with app.py")
print("   2. Use web interface for predictions")
print("   3. Deploy for customer retention campaigns")


‚úÖ TRAINING COMPLETED SUCCESSFULLY!

üìÅ Generated Files:
   ‚úÖ ../models/churn_prediction_model.pkl
   ‚úÖ ../artifacts/confusion_matrix_churn.png
   ‚úÖ ../artifacts/model_comparison_churn.png
   ‚úÖ ../artifacts/roc_curves_churn.png
   ‚úÖ ../artifacts/feature_importance.png
   ‚úÖ ../artifacts/training_metrics.json
   artifacts/classification_report_churn.txt

üèÜ Best Model Performance:
   Model: Random Forest (Tuned)
   Accuracy: 0.8375
   F1-Score: 0.6208
   ROC-AUC: 0.8612

üíº Business Impact:
   Can identify 266 out of 407 potential churners
   Churn detection rate: 65.36%
   False alarm rate: 11.55%

üöÄ Next Steps:
   1. Test model with app.py
   2. Use web interface for predictions
   3. Deploy for customer retention campaigns
