In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.neural_network import MLPClassifier

In [35]:
path="churn_modelling.csv"
df=pd.read_csv(path)
print("Dataset shape:",df.shape)
print("\nColumns:",df.columns.tolist())
df.head()

Dataset shape: (10000, 14)

Columns: ['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [42]:
y=df['Exited']
X=df.drop(columns=['RowNumber','CustomerId','Surname','Exited'],errors='ignore')
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [46]:
X_processed = X.copy()

if 'Gender' in X_processed.columns:
    X_processed['Gender'] = X_processed['Gender'].map({'Male': 1, 'Female': 0})

if 'Geography' in X_processed.columns:
    geo_dummies = pd.get_dummies(X_processed['Geography'], prefix='Geo', drop_first=True)
    X_processed = pd.concat([X_processed.drop(columns=['Geography']), geo_dummies], axis=1)

print("Processed feature columns:", X_processed.columns.tolist())
X_processed.head()

Processed feature columns: ['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Geo_Germany', 'Geo_Spain']


Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geo_Germany,Geo_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,False,False
1,608,0,41,1,83807.86,1,0,1,112542.58,False,True
2,502,0,42,8,159660.8,3,1,0,113931.57,False,False
3,699,0,39,1,0.0,2,0,0,93826.63,False,False
4,850,0,43,2,125510.82,1,1,1,79084.1,False,True


In [52]:
X_train,X_test,y_train,y_test=train_test_split(
    X_processed,y,test_size=0.2,random_state=42,stratify=y
)
print("Training set shape:",X_train.shape)
print("Testing set shape:",X_test.shape)


Training set shape: (8000, 11)
Testing set shape: (2000, 11)


In [58]:
scaler=StandardScaler()
num_cols=X_train.select_dtypes(include=[np.number]).columns.tolist()
X_train[num_cols]=scaler.fit_transform(X_train[num_cols])
X_test[num_cols]=scaler.transform(X_test[num_cols])
print("Mean of scaled traininf features(approx 0):")
print(X_train[num_cols].mean().round(3))
print(" standard deviation(approx 1):")
print(X_train[num_cols].std().round(3))

Mean of scaled traininf features(approx 0):
CreditScore        0.0
Gender             0.0
Age                0.0
Tenure            -0.0
Balance            0.0
NumOfProducts     -0.0
HasCrCard          0.0
IsActiveMember     0.0
EstimatedSalary   -0.0
dtype: float64
 standard deviation(approx 1):
CreditScore        1.0
Gender             1.0
Age                1.0
Tenure             1.0
Balance            1.0
NumOfProducts      1.0
HasCrCard          1.0
IsActiveMember     1.0
EstimatedSalary    1.0
dtype: float64


In [66]:
mlp=MLPClassifier(
    hidden_layer_sizes=(64,32,16),
    activation='relu',
    solver='adam',
    alpha=1e-4,
    batch_size=64,
    learning_rate_init=0.001,
    max_iter=200,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10,
    random_state=42,
    verbose=True
)
mlp.fit(X_train,y_train)

Iteration 1, loss = 0.51298925
Validation score: 0.813750
Iteration 2, loss = 0.42146846
Validation score: 0.840000
Iteration 3, loss = 0.39445369
Validation score: 0.842500
Iteration 4, loss = 0.37218501
Validation score: 0.850000
Iteration 5, loss = 0.35436282
Validation score: 0.850000
Iteration 6, loss = 0.34469342
Validation score: 0.852500
Iteration 7, loss = 0.33831919
Validation score: 0.846250
Iteration 8, loss = 0.33639606
Validation score: 0.852500
Iteration 9, loss = 0.33105675
Validation score: 0.847500
Iteration 10, loss = 0.32876211
Validation score: 0.851250
Iteration 11, loss = 0.32503444
Validation score: 0.850000
Iteration 12, loss = 0.32284118
Validation score: 0.853750
Iteration 13, loss = 0.32041726
Validation score: 0.851250
Iteration 14, loss = 0.31701689
Validation score: 0.858750
Iteration 15, loss = 0.31600989
Validation score: 0.852500
Iteration 16, loss = 0.31428640
Validation score: 0.850000
Iteration 17, loss = 0.31019505
Validation score: 0.848750
Iterat

In [70]:
y_pred=mlp.predict(X_test)
accuracy=accuracy_score(y_test,y_pred)
print(f"\n test Accuracy:{accuracy:.4f}:")
cm=confusion_matrix(y_test,y_pred)
print("\nconfusion Matrix(row=True Class,cols=Predicted class):")
print(cm)

print("\nClassification Report:")
print(classification_report(y_test,y_pred,digits=4))


 test Accuracy:0.3230:

confusion Matrix(row=True Class,cols=Predicted class):
[[ 309 1284]
 [  70  337]]

Classification Report:
              precision    recall  f1-score   support

           0     0.8153    0.1940    0.3134      1593
           1     0.2079    0.8280    0.3323       407

    accuracy                         0.3230      2000
   macro avg     0.5116    0.5110    0.3229      2000
weighted avg     0.6917    0.3230    0.3172      2000

