In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer 
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score

<h2>Data Preprocessing</h2>

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
df_infos = df.iloc[:, 3 : -1].values
df_results = df.iloc[:, -1].values

Categorical Data

In [3]:
#Encoding the Gender
le = LabelEncoder()
df_infos[:, 2] = le.fit_transform(df_infos[:, 2])
#Encoding the Geography
ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[1])],remainder='passthrough')
df_infos = np.array(ct.fit_transform(df_infos))

In [4]:
print(df_infos)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


Spliting in training and test

In [5]:
df_results_train , df_results_test , df_infos_train , df_infos_test = train_test_split(df_results, df_infos , test_size= 0.2, random_state= 1 )

Feature Scalling

In [6]:
sc = StandardScaler()
df_infos_train = sc.fit_transform(df_infos_train)
df_infos_test = sc.fit_transform(df_infos_test)

<h2>Building the ANN</h2>

In [7]:
ann = tf.keras.models.Sequential()

Ipunt and first hidden layer

In [8]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))

Second hidden layer

In [9]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))

Output layer

In [10]:
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

<h2>Training the ANN</h2>

In [11]:
ann.compile(optimizer = 'adam',loss = 'binary_crossentropy',metrics = ['accuracy'])

In [16]:
ann.fit(df_infos_train,df_results_train, batch_size = 32, epochs = 150)

Epoch 1/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 773us/step - accuracy: 0.8642 - loss: 0.3282
Epoch 2/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 692us/step - accuracy: 0.8598 - loss: 0.3323
Epoch 3/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 761us/step - accuracy: 0.8559 - loss: 0.3426
Epoch 4/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 705us/step - accuracy: 0.8692 - loss: 0.3286
Epoch 5/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 676us/step - accuracy: 0.8590 - loss: 0.3405
Epoch 6/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 662us/step - accuracy: 0.8591 - loss: 0.3397
Epoch 7/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 684us/step - accuracy: 0.8637 - loss: 0.3314
Epoch 8/150
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 747us/step - accuracy: 0.8676 - loss: 0.3170
Epoch 9/150
[1m

<keras.src.callbacks.history.History at 0x2ad86052710>

<h2>predicting the ANN</h2>

In [17]:
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[[False]]


In [18]:
pred_results = ann.predict(df_infos_test)
pred_results = (pred_results > 0.5)
print(np.concatenate((pred_results.reshape(len(pred_results),1), df_results_test.reshape(len(df_results_test),1)),1))

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 562us/step
[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [19]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(df_results_test, pred_results)
print(cm)
accuracy_score(df_results_test, pred_results)

[[1523   62]
 [ 213  202]]


0.8625