In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Step 1: Read the dataset
data = pd.read_csv('Churn_Modelling.csv')  # Assuming you have downloaded the dataset

In [3]:
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [4]:
# Step 2: Distinguish the feature and target set and divide the data set into training and test sets.
X = data.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)             #1 2 3 last
y = data['Exited']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 3: Normalize the train and test data.
scaler = StandardScaler()
X_train_numeric = X_train.drop(['Geography', 'Gender'], axis=1)
X_test_numeric = X_test.drop(['Geography', 'Gender'], axis=1)
X_train_numeric = scaler.fit_transform(X_train_numeric)
X_test_numeric = scaler.transform(X_test_numeric)

In [7]:
# One-hot encoding for categorical variables
encoder = OneHotEncoder(sparse=False, drop='first')
X_train_categorical = encoder.fit_transform(X_train[['Geography', 'Gender']])
X_test_categorical = encoder.transform(X_test[['Geography', 'Gender']])



In [8]:
# Concatenate the numeric and encoded categorical features
X_train_processed = pd.concat([pd.DataFrame(X_train_numeric), pd.DataFrame(X_train_categorical)], axis=1)
X_test_processed = pd.concat([pd.DataFrame(X_test_numeric), pd.DataFrame(X_test_categorical)], axis=1)

In [9]:
# Convert labels to numpy arrays
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [10]:
# Step 4: Initialize and build the model.
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train_processed.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile('adam','binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_processed, y_train, epochs=10, batch_size=32, validation_data=(X_test_processed, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2966886c940>

In [13]:
# Step 5: Print the accuracy score and confusion matrix
y_pred = (model.predict(X_test_processed) > 0.5).astype("int32") 

accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)



In [14]:
print(f"Accuracy Score: {accuracy}")
print("Confusion Matrix:")
print(confusion)

Accuracy Score: 0.862
Confusion Matrix:
[[1534   73]
 [ 203  190]]


In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.88      0.95      0.92      1607
           1       0.72      0.48      0.58       393

    accuracy                           0.86      2000
   macro avg       0.80      0.72      0.75      2000
weighted avg       0.85      0.86      0.85      2000



In [16]:
data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1
