# **Assignment no:3**

Given a bank customer, build a neural network-based classifier
that can determine whether they will leave or not in the next 6 months

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from keras.models import Sequential
from keras.layers import Dense
import io

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Churn_Modelling.csv to Churn_Modelling.csv


In [None]:
dataset = pd.read_csv(io.StringIO(uploaded['Churn_Modelling.csv'].decode('utf-8')))

In [None]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
# Data preprocessing
X = dataset.iloc[:, 3:13].values  # Selecting necessary features
y = dataset.iloc[:, 13].values  # Target variable

In [None]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [None]:
# Encode categorical data (Country, Gender)
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])  # Encode Geography
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])  # Encode Gender

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Apply OneHotEncoder to the 'Geography' column (second column, index 1)
ct = ColumnTransformer([("Geography", OneHotEncoder(), [1])], remainder='passthrough')
# Transform the dataset, encoding 'Geography' as one-hot vectors
X = ct.fit_transform(X)

# Avoid the dummy variable trap by removing the first one-hot encoded column
X = X[:, 1:]


In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler to perform feature scaling
sc = StandardScaler()

# Fit the scaler to the training data and then transform it
X_train = sc.fit_transform(X_train)

# Apply the same transformation to the test data
X_test = sc.transform(X_test)


In [None]:
# Building the Neural Network
classifier = Sequential()

In [None]:
# Initialize the neural network
classifier = Sequential()

# Add the input layer (11 features) and the first hidden layer with 6 neurons
classifier.add(Dense(units=6, activation='relu', input_dim=11))


In [None]:
# Add the second hidden layer with 6 neurons and ReLU activation
classifier.add(Dense(units=6, activation='relu'))


In [None]:
# Add the output layer with 1 neuron for binary classification, using sigmoid activation
classifier.add(Dense(units=1, activation='sigmoid'))

In [None]:
# Compile the ANN with Adam optimizer, binary crossentropy loss, and accuracy metric
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model using the training data with 100 epochs
classifier.fit(X_train, y_train, epochs=100)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7499 - loss: 0.5944
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8054 - loss: 0.4695
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7994 - loss: 0.4496
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8023 - loss: 0.4248
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7949 - loss: 0.4243
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8046 - loss: 0.4160
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8228 - loss: 0.3902
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8301 - loss: 0.3823
Epoch 9/100
[1m250/250[0m [32

<keras.src.callbacks.history.History at 0x7d1a57834e50>

In [None]:
# Predict probabilities for the test set
y_pred = classifier.predict(X_test)

# Convert probabilities to binary predictions based on a threshold of 0.5
y_pred = (y_pred > 0.5)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [None]:
# Evaluate the model
#True Negative (TN)   	False Positive (FP)
#False Negative (FN) 	  True Positive (TP)

cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

print(f"Confusion Matrix: \n{cm}")
print(f"Accuracy: {accuracy}")

Confusion Matrix: 
[[1527   68]
 [ 205  200]]
Accuracy: 0.8635
