In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
from xgboost import XGBClassifier

In [6]:
df = pd.read_csv("/content/Customer_Data.csv")

In [13]:
# Reload the data to start fresh
df = pd.read_csv("/content/Customer_Data.csv")

# Prepare the target variable - convert customer status to binary
df['Churn'] = df['Customer_Status'].apply(lambda x: 1 if x == 'Churned' else 0)

# Remove columns that would cause data leakage or aren't useful for prediction
columns_to_drop = ['Customer_ID', 'Customer_Status', 'Churn_Category', 'Churn_Reason']
df = df.drop(columns=columns_to_drop)

# Handle missing values by removing them
df = df.dropna()

# Convert categorical variables to numeric using one-hot encoding
df = pd.get_dummies(df, drop_first=True)

# Separate features from target
X = df.drop('Churn', axis=1)
y = df['Churn']

# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y  # Ensures balanced split
)

# Standardize features for better model performance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the XGBoost model with optimized hyperparameters
print("Training Enhanced XGBoost Classifier with optimized parameters...")
print("This may take a moment...\n")

model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42,

    # Optimized hyperparameters for better performance
    n_estimators=200,           # More trees for better learning
    max_depth=6,                # Deeper trees to capture complex patterns
    learning_rate=0.05,         # Slower learning for better generalization
    subsample=0.8,              # Use 80% of data for each tree
    colsample_bytree=0.8,       # Use 80% of features for each tree
    min_child_weight=3,         # Minimum samples in leaf nodes
    gamma=0.1,                  # Minimum loss reduction for split
    scale_pos_weight=2,         # Handle class imbalance (more weight to churners)
    reg_alpha=0.1,              # L1 regularization
    reg_lambda=1.0              # L2 regularization
)

model.fit(X_train_scaled, y_train)
print("‚úì Training completed!\n")

# Generate predictions
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

# Display model performance metrics
print("="*60)
print("MODEL PERFORMANCE EVALUATION")
print("="*60)

conf_matrix = confusion_matrix(y_test, y_pred)
print("\nüìä Confusion Matrix:")
print(f"    Predicted: No Churn | Predicted: Churn")
print(f"    ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ")
print(f"Actually No Churn:  {conf_matrix[0][0]:>4}      |      {conf_matrix[0][1]:>4}")
print(f"Actually Churn:     {conf_matrix[1][0]:>4}      |      {conf_matrix[1][1]:>4}")

print("\nüìà Classification Report:")
print(classification_report(y_test, y_pred, target_names=['No Churn', 'Churn']))

roc_score = roc_auc_score(y_test, y_proba)
print(f"üéØ ROC AUC Score: {roc_score:.4f}")
print(f"   (Score ranges from 0.5 to 1.0, where 1.0 is perfect)")

# Calculate additional metrics for insights
accuracy = (conf_matrix[0][0] + conf_matrix[1][1]) / conf_matrix.sum()
sensitivity = conf_matrix[1][1] / (conf_matrix[1][0] + conf_matrix[1][1])  # Recall for churners
specificity = conf_matrix[0][0] / (conf_matrix[0][0] + conf_matrix[0][1])  # Recall for non-churners

print(f"\n‚úì Overall Accuracy: {accuracy:.2%}")
print(f"‚úì Correctly Predicted: {conf_matrix[0][0] + conf_matrix[1][1]} out of {conf_matrix.sum()} cases")
print(f"‚úì Churn Detection Rate (Sensitivity): {sensitivity:.2%}")
print(f"‚úì Non-Churn Detection Rate (Specificity): {specificity:.2%}")

# Show top 10 most important features
print("\n" + "="*60)
print("üîç TOP 10 MOST IMPORTANT FEATURES FOR CHURN PREDICTION")
print("="*60)
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

for idx, row in feature_importance.head(10).iterrows():
    bar_length = int(row['Importance'] * 50)
    bar = '‚ñà' * bar_length
    print(f"{row['Feature'][:35]:35} {bar} {row['Importance']:.4f}")

# Generate predictions for the entire dataset
print("\n" + "="*60)
print("Generating predictions for all customers...")
X_full_scaled = scaler.transform(X)
df['Predicted_Churn'] = model.predict(X_full_scaled)
df['Churn_Probability'] = model.predict_proba(X_full_scaled)[:, 1]

# Risk segmentation
df['Risk_Segment'] = pd.cut(
    df['Churn_Probability'],
    bins=[0, 0.3, 0.6, 1.0],
    labels=['Low Risk', 'Medium Risk', 'High Risk']
)

# Save results
output_file = "Churn_Prediction_Output.csv"
df.to_csv(output_file, index=False)
print(f"‚úÖ Success! Predictions saved to '{output_file}'")
print(f"   Total records: {len(df)}")
print(f"   Predicted churners: {df['Predicted_Churn'].sum()} ({df['Predicted_Churn'].sum()/len(df):.1%})")
print(f"\nüìä Risk Segmentation:")
print(df['Risk_Segment'].value_counts().sort_index())
print("="*60)

Training Enhanced XGBoost Classifier with optimized parameters...
This may take a moment...



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


‚úì Training completed!

MODEL PERFORMANCE EVALUATION

üìä Confusion Matrix:
    Predicted: No Churn | Predicted: Churn
    ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Actually No Churn:   233      |        40
Actually Churn:       46      |        80

üìà Classification Report:
              precision    recall  f1-score   support

    No Churn       0.84      0.85      0.84       273
       Churn       0.67      0.63      0.65       126

    accuracy                           0.78       399
   macro avg       0.75      0.74      0.75       399
weighted avg       0.78      0.78      0.78       399

üéØ ROC AUC Score: 0.8263
   (Score ranges from 0.5 to 1.0, where 1.0 is perfect)

‚úì Overall Accuracy: 78.45%
‚úì Correctly Predicted: 313 out of 399 cases
‚úì Churn Detection Rate (Sensitivity): 63.49%
‚úì Non-Churn Detection Rate (Specificity): 85.35%

üîç TOP 10 MOST IMPORTANT FEATURES FOR CHURN PREDICTION
Value_De