# Importing Dataset and Explanation of Features


In [26]:
# Feature Values:

# CreditScore -> Credit score of the customer.
# Geography -> Country of the Customer.
# Gender -> Sex of the Customer.
# Age -> Age of the customer.
# Tenure -> How many years customer has been in this bank.
# Balance -> Balance of the customer.
# NumOfProducts -> Number of products they use from the bank.
# HasCrCard -> Does customer has credit card or not?
# IsActiveMember -> Is customer active or not? Is it using any facility/product of the bank?
# EstimatedSalary -> Salary of the customer estimated by the bank.

# Target Value:

# Exited -> Is customer exited the bank or not? (1 means exited, 0 means stayed in the bank)

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [28]:
tf.__version__

'2.15.0'

In [29]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [30]:
X = df.iloc[:, 3:-1].values # Getting every val started from 4t column to last column.
y = df['Exited'].values

In [31]:
print(X)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [32]:
print(y)

[1 0 1 ... 1 1 0]


# Data Preprocessing

### Encoding Categorical Data

In [33]:
from sklearn.preprocessing import LabelEncoder

le= LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])

In [34]:
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


In [35]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [1])], remainder = 'passthrough')
X = np.array(ct.fit_transform(X))

In [36]:
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


### Splitting the dataset into the Training set and Test Set

In [37]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)


### Feature Scaling

In [38]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train) # Scaling the values before training is a must in Deep Learning.
X_test = sc.fit_transform(X_test)

# Building the ANN

In [39]:
ann = tf.keras.models.Sequential() # Creating Sequential Class.

### Adding the Input Layer and the first hidden layer

In [40]:
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu')) # Adding the first hidden layer. Unit represents neurons in layer. relu represents rectifier activation function.

### Adding the second hidden layer

In [41]:
ann.add(tf.keras.layers.Dense(units = 6, activation = 'relu'))

### Adding the output layer.

In [42]:
# Because it is a binary outcome, we can use 1 neuron in output layer. If it was classification with 3 values, we could use 3 neurons in our output layer.
# Also, we are using sigmoid activation function we do not want to get only ultimately the predictions, also the probabilities of that the binary outcome is 1.
# When doing non-binary classification, when predicting more than two categories, the activation should not be sigmoid, but soft max.
ann.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))


# Training the ANN

### Compiling the ANN

In [43]:
# Adam is the very performance optimizer that can perform the Stochastic Gradient Descent.
# When you are doing binary clasification, the loss function must always be the "binary_crossentropy". If you are predicting category classification, you should use "category_crossentropy".
ann.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [44]:
# Batch size in here, instead of predicting your real result with y_hat, you can do that with several predictions compared to several real results into a batch.
# Classic number of batch size is mostly chosen 32.
# Epoch = iterations. Few epochs lead underfitting, too many epochs lead overfitting.
ann.fit(X_train, y_train, batch_size = 32, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7fe3c09630a0>

### Making Predictions and Evaluating the model

#### Predicting the result of a single observation

In [45]:
ann.predict(sc.transform([[1,0,0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]]))



array([[0.03246418]], dtype=float32)

In [46]:
# To get a True or False output instead of leaving rate of the customer.
print(ann.predict(sc.transform([[0,1,0, 600, 0, 25, 1, 2500, 1, 0, 1, 50000]])) >0.5)

[[False]]


#### Predicting the Test set result

In [47]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)



In [48]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1548   59]
 [ 226  167]]


0.8575