# Bank churn prediction using TensorFlow simple model

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
import seaborn as sn
%matplotlib inline

Importing Data

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


# Exploratory Data Analysis + Feature engineering(scaling + encoding)

In [3]:
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis='columns', inplace=True)

In [4]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
df.describe()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [6]:
df.describe(include='object')

Unnamed: 0,Geography,Gender
count,10000,10000
unique,3,2
top,France,Male
freq,5014,5457


In [7]:
def print_unique_col_values(df):
       for column in df:
            if df[column].dtypes=='object':
                print(f'{column}: {df[column].unique()}') 

In [8]:
print_unique_col_values(df)

Geography: ['France' 'Spain' 'Germany']
Gender: ['Female' 'Male']


In [9]:
df.Gender.replace({'Female': 1, 'Male': 0}, inplace=True)

In [10]:
df2 = pd.get_dummies(df)

In [11]:
df2.drop("Geography_France",axis='columns',inplace=True)

In [12]:
df2.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain
0,619,1,42,2,0.0,1,1,1,101348.88,1,False,False
1,608,1,41,1,83807.86,1,0,1,112542.58,0,False,True
2,502,1,42,8,159660.8,3,1,0,113931.57,1,False,False
3,699,1,39,1,0.0,2,0,0,93826.63,0,False,False
4,850,1,43,2,125510.82,1,1,1,79084.1,0,False,True


In [13]:
from sklearn.preprocessing import MinMaxScaler

In [14]:
cols_to_scale=['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']
scaler = MinMaxScaler()
df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])

In [15]:
df2.describe()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.601058,0.4543,0.282727,0.50128,0.304848,0.176733,0.7055,0.5151,0.500441,0.2037
std,0.193307,0.497932,0.141727,0.289217,0.248696,0.193885,0.45584,0.499797,0.28758,0.402769
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.468,0.0,0.189189,0.3,0.0,0.0,0.0,0.0,0.254977,0.0
50%,0.604,0.0,0.256757,0.5,0.387402,0.0,1.0,1.0,0.50096,0.0
75%,0.736,1.0,0.351351,0.7,0.508749,0.333333,1.0,1.0,0.746955,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [16]:
x = df2.drop('Exited', axis='columns')
y = df2.Exited

In [17]:
from sklearn.model_selection import train_test_split
x_tr, x_te, y_tr, y_te = train_test_split(x,y,test_size=0.2)

In [18]:
x_tr.shape, y_tr.shape,x_te.shape, y_te.shape

((8000, 11), (8000,), (2000, 11), (2000,))

In [19]:
x_tr

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain
8178,0.714,1,0.378378,0.7,0.508082,0.333333,1,1,0.730070,False,False
4961,0.678,0,0.364865,0.0,0.518819,0.333333,1,0,0.754296,True,False
9759,0.554,1,0.121622,0.2,0.000000,0.333333,1,0,0.627257,False,False
8352,0.474,1,0.418919,0.7,0.619351,0.333333,1,0,0.066489,True,False
6299,0.480,0,0.081081,0.7,0.503916,0.000000,1,0,0.293876,False,False
...,...,...,...,...,...,...,...,...,...,...,...
5622,0.640,0,0.270270,1.0,0.356388,0.000000,0,0,0.721388,False,False
142,0.664,1,0.337838,0.5,0.501606,0.000000,1,1,0.966626,False,False
8898,1.000,0,0.297297,0.6,0.497366,0.000000,1,0,0.328034,False,False
6315,0.418,0,0.662162,0.9,0.501874,0.000000,1,0,0.879581,False,True


In [20]:
model = keras.Sequential([
    keras.layers.Dense(11, input_shape=(11,), activation='relu'),
    keras.layers.Dense(5, activation='relu'),
    keras.layers.Dense(1,activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy']
)

model.fit(x_tr, y_tr, epochs=100)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

In [None]:
model.evaluate(x_te,y_te)

In [None]:
yp = model.predict(x_te)
yp

In [None]:
y_pred = []
for element in yp:
    if element > 0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
from sklearn.metrics import confusion_matrix , classification_report

print(classification_report(y_te,y_pred))

In [None]:
cm = tf.math.confusion_matrix(labels=y_te,predictions=y_pred)

plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')