In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the processed dataset
df_clean = pd.read_csv('churn_processed.csv')

# Separate features and target
X = df_clean.drop('Exited', axis=1)
y = df_clean['Exited']

# Split the data into training and test sets (70% train, 30% test) - same as previous tasks
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling - crucial for neural networks
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
# Define the parameter grid for Neural Network (MLP)
param_grid = {
    'hidden_layer_sizes': [(10,), (20,), (50,), (100,), (10, 10), (20, 10)],  # Network architecture
    'activation': ['relu', 'tanh'],                                         # Activation function
    'alpha': [0.0001, 0.001, 0.01],                                         # L2 regularization
    'learning_rate_init': [0.001, 0.01],                                    # Initial learning rate
    'max_iter': [200, 500]                                                  # Maximum iterations
}

# Explanation of hyperparameters:
# Hyperparameter search explanation
# hidden_layer_sizes: Defines the architecture (neurons in each hidden layer
# activation: Activation function for hidden layers (relu = Rectified Linear Unit, tanh = Hyperbolic Tangent)
# alpha: L2 regularization parameter to prevent overfitting
# learning_rate_init: Initial learning rate for weight updates
# max_iter: Maximum number of iterations for convergence

In [4]:
# Create the GridSearchCV object for neural network
grid_search_nn = GridSearchCV(
    estimator=MLPClassifier(random_state=42, early_stopping=True, validation_fraction=0.1, verbose=True),
    param_grid=param_grid,
    cv=3,                    # 3-fold cross-validation (reduced from 5 since NN is computationally intensive)
    scoring='accuracy',      # Use accuracy as the scoring metric
    n_jobs=-1,               # Use all available CPU cores
    verbose=1                # Show progress
)

In [5]:
# Perform the grid search
print("\nStarting Grid Search for Neural Network optimization...")
grid_search_nn.fit(X_train_scaled, y_train)

# Get the best parameters and best estimator
print("\nBest Parameters:")
print(grid_search_nn.best_params_)


Starting Grid Search for Neural Network optimization...
Fitting 3 folds for each of 144 candidates, totalling 432 fits
Iteration 1, loss = 0.40487021
Validation score: 0.952857
Iteration 2, loss = 0.13453731
Validation score: 0.957143
Iteration 3, loss = 0.11148727
Validation score: 0.957143
Iteration 4, loss = 0.10754915
Validation score: 0.955714
Iteration 5, loss = 0.10670289
Validation score: 0.955714
Iteration 6, loss = 0.10583234
Validation score: 0.955714
Iteration 7, loss = 0.10529085
Validation score: 0.955714
Iteration 8, loss = 0.10462780
Validation score: 0.955714
Iteration 9, loss = 0.10357457
Validation score: 0.955714
Iteration 10, loss = 0.10323840
Validation score: 0.955714
Iteration 11, loss = 0.10287716
Validation score: 0.955714
Iteration 12, loss = 0.10249774
Validation score: 0.955714
Iteration 13, loss = 0.10185335
Validation score: 0.955714
Validation score did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.

Best Parameters:
{'activatio

In [6]:
# Use the best model
nn_best = grid_search_nn.best_estimator_

# Evaluate on training and test sets
y_train_pred_nn = nn_best.predict(X_train_scaled)
y_test_pred_nn = nn_best.predict(X_test_scaled)

train_accuracy_nn = accuracy_score(y_train, y_train_pred_nn)
test_accuracy_nn = accuracy_score(y_test, y_test_pred_nn)

print("\nNeural Network Performance:")
print(f"Training Accuracy: {train_accuracy_nn:.4f}")
print(f"Test Accuracy: {test_accuracy_nn:.4f}")


Neural Network Performance:
Training Accuracy: 0.9704
Test Accuracy: 0.9660


In [7]:
# Check for overfitting
print("\nOverfitting Check:")
print(f"Accuracy difference (Train - Test): {train_accuracy_nn - test_accuracy_nn:.4f}")
if train_accuracy_nn - test_accuracy_nn > 0.05:
    print("Possible overfitting: Training accuracy is significantly higher than test accuracy")
else:
    print("No strong evidence of overfitting: Training and test accuracies are similar")


Overfitting Check:
Accuracy difference (Train - Test): 0.0044
No strong evidence of overfitting: Training and test accuracies are similar


In [8]:
# Check convergence
print("\nConvergence Check:")
print(f"Best model iterations: {nn_best.n_iter_}")
print(f"Maximum iterations allowed: {nn_best.max_iter}")
if nn_best.n_iter_ < nn_best.max_iter:
    print("The model converged before reaching maximum iterations")
else:
    print("The model reached maximum iterations without convergence")


Convergence Check:
Best model iterations: 13
Maximum iterations allowed: 200
The model converged before reaching maximum iterations


In [9]:
# Display model details
print("\nBest Neural Network architecture:")
print(f"Hidden layer sizes: {nn_best.hidden_layer_sizes}")
print(f"Activation function: {nn_best.activation}")
print(f"Alpha (L2 regularization): {nn_best.alpha}")
print(f"Learning rate: {nn_best.learning_rate_init}")
print(f"Solver: {nn_best.solver}")


Best Neural Network architecture:
Hidden layer sizes: (10,)
Activation function: tanh
Alpha (L2 regularization): 0.0001
Learning rate: 0.01
Solver: adam


In [10]:
# Get the top 5 features from the best decision tree model (Task 2)
top_dt_features = ['ComplaintsLodged', 'Age', 'NumOfProducts']

print("\nTop features from best decision tree model:")
for feature in top_dt_features:
    print(f"- {feature}")

# Create reduced dataset with only these features
X_train_reduced = X_train[top_dt_features]
X_test_reduced = X_test[top_dt_features]

# Apply scaling to the reduced feature set
X_train_reduced_scaled = scaler.fit_transform(X_train_reduced)
X_test_reduced_scaled = scaler.transform(X_test_reduced)

print("\nReduced dataset prepared:")
print(f"Training set shape: {X_train_reduced_scaled.shape}")
print(f"Test set shape: {X_test_reduced_scaled.shape}")


Top features from best decision tree model:
- ComplaintsLodged
- Age
- NumOfProducts

Reduced dataset prepared:
Training set shape: (7000, 3)
Test set shape: (3000, 3)


In [22]:
# Define parameter grid for reduced neural network
param_grid_reduced = {
    'hidden_layer_sizes': [(5,), (10,), (20,), (5, 5), (10, 5)],  # Smaller architectures for fewer inputs
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01],
    'max_iter': [200, 500]
}


In [23]:
# Create the GridSearchCV object for reduced neural network
grid_search_nn_reduced = GridSearchCV(
    estimator=MLPClassifier(random_state=42, early_stopping=True, validation_fraction=0.1),
    param_grid=param_grid_reduced,
    cv=3,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1
)

In [None]:
# Perform the grid search on reduced feature set
print("\nStarting Grid Search for reduced Neural Network...")
grid_search_nn_reduced.fit(X_train_reduced_scaled, y_train)

# Get the best parameters and best estimator
print("\nBest Parameters for reduced Neural Network:")
print(grid_search_nn_reduced.best_params_)


Starting Grid Search for reduced Neural Network...
Fitting 3 folds for each of 120 candidates, totalling 360 fits


In [None]:
# Use the best reduced model
nn_reduced_best = grid_search_nn_reduced.best_estimator_

# Evaluate on training and test sets
y_train_pred_nn_reduced = nn_reduced_best.predict(X_train_reduced_scaled)
y_test_pred_nn_reduced = nn_reduced_best.predict(X_test_reduced_scaled)

train_accuracy_nn_reduced = accuracy_score(y_train, y_train_pred_nn_reduced)
test_accuracy_nn_reduced = accuracy_score(y_test, y_test_pred_nn_reduced)

print("\nReduced Neural Network Performance:")
print(f"Training Accuracy: {train_accuracy_nn_reduced:.4f}")
print(f"Test Accuracy: {test_accuracy_nn_reduced:.4f}")


In [None]:
# Check for overfitting in reduced model
print("\nOverfitting Check for reduced model:")
print(f"Accuracy difference (Train - Test): {train_accuracy_nn_reduced - test_accuracy_nn_reduced:.4f}")
if train_accuracy_nn_reduced - test_accuracy_nn_reduced > 0.05:
    print("Possible overfitting: Training accuracy is significantly higher than test accuracy")
else:
    print("No strong evidence of overfitting: Training and test accuracies are similar")

In [None]:
# Check convergence of reduced model
print("\nConvergence Check for reduced model:")
print(f"Reduced model iterations: {nn_reduced_best.n_iter_}")
print(f"Maximum iterations allowed: {nn_reduced_best.max_iter}")
if nn_reduced_best.n_iter_ < nn_reduced_best.max_iter:
    print("The reduced model converged before reaching maximum iterations")
else:
    print("The reduced model reached maximum iterations without convergence")

# Display reduced model details
print("\nReduced Neural Network architecture:")
print(f"Hidden layer sizes: {nn_reduced_best.hidden_layer_sizes}")
print(f"Activation function: {nn_reduced_best.activation}")
print(f"Alpha (L2 regularization): {nn_reduced_best.alpha}")
print(f"Learning rate: {nn_reduced_best.learning_rate_init}")