In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder

## Input data

In [2]:
df = pd.read_csv('./Churn_Modelling.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [5]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
X, y = df.drop(['Exited'], axis=1), df['Exited']

## Preprocessing data

In [7]:
geography_encoder = LabelEncoder()
gender_encoder = LabelEncoder()

In [8]:
X['Geography'] = geography_encoder.fit_transform(X['Geography'])

In [9]:
X['Gender'] = gender_encoder.fit_transform(X['Gender'])

In [10]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1


In [11]:
X['Geography'].unique()

array([0, 2, 1])

In [12]:
one_hot_encoder = OneHotEncoder(categorical_features=[1])

In [13]:
X_encoded = one_hot_encoder.fit_transform(X).toarray()

In [14]:
X_encoded[0]

array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.1900000e+02,
       0.0000000e+00, 4.2000000e+01, 2.0000000e+00, 0.0000000e+00,
       1.0000000e+00, 1.0000000e+00, 1.0000000e+00, 1.0134888e+05])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=101)

In [16]:
scaler = MinMaxScaler()

In [17]:
scaled_X_train = scaler.fit_transform(X_train)

In [18]:
print(X_train[0])
print(scaled_X_train[0])

[0.0000000e+00 0.0000000e+00 1.0000000e+00 5.1100000e+02 0.0000000e+00
 2.9000000e+01 9.0000000e+00 0.0000000e+00 2.0000000e+00 0.0000000e+00
 1.0000000e+00 1.4067698e+05]
[0.         0.         1.         0.322      0.         0.14864865
 0.9        0.         0.33333333 0.         1.         0.70347065]


In [19]:
scaled_X_test = scaler.transform(X_test)

In [20]:
scaled_X_test[0]

array([0.        , 0.        , 1.        , 0.538     , 1.        ,
       0.45945946, 0.8       , 0.        , 0.33333333, 1.        ,
       1.        , 0.61627849])

In [21]:
model = tf.keras.models.Sequential()

In [22]:
model.add(tf.keras.layers.Dense(24, activation='relu', input_dim=12))

In [23]:
model.add(tf.keras.layers.Dense(32, activation='relu'))

In [24]:
model.add(tf.keras.layers.Dense(12, activation='relu'))

In [25]:
model.add(tf.keras.layers.Dense(2, activation='softmax'))

In [26]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [27]:
model.fit(scaled_X_train, y_train, epochs=50)

Epoch 1/50

Epoch 2/50

Epoch 3/50

Epoch 4/50

Epoch 5/50

Epoch 6/50

Epoch 7/50

Epoch 8/50

Epoch 9/50

Epoch 10/50

Epoch 11/50

Epoch 12/50

Epoch 13/50

Epoch 14/50

Epoch 15/50

Epoch 16/50

Epoch 17/50

Epoch 18/50

Epoch 19/50

Epoch 20/50

Epoch 21/50

Epoch 22/50

Epoch 23/50

Epoch 24/50

Epoch 25/50

Epoch 26/50

Epoch 27/50

Epoch 28/50

Epoch 29/50

Epoch 30/50

Epoch 31/50

Epoch 32/50

Epoch 33/50

Epoch 34/50

Epoch 35/50

Epoch 36/50

Epoch 37/50

Epoch 38/50

Epoch 39/50

Epoch 40/50

Epoch 41/50

Epoch 42/50

Epoch 43/50

Epoch 44/50

Epoch 45/50

Epoch 46/50

Epoch 47/50

Epoch 48/50

Epoch 49/50

Epoch 50/50



<tensorflow.python.keras._impl.keras.callbacks.History at 0x121a247f0>

In [28]:
y_pred = model.predict(scaled_X_test)

In [29]:
y_pred

array([[0.9673089 , 0.03269113],
       [0.9525623 , 0.04743778],
       [0.42226192, 0.5777381 ],
       ...,
       [0.98815596, 0.01184408],
       [0.42085975, 0.5791402 ],
       [0.56153035, 0.43846962]], dtype=float32)

In [30]:
from sklearn.metrics import classification_report

In [31]:
y_pred = [np.argmax(x) for x in y_pred]

In [32]:
print(classification_report(y_test, y_pred))

             precision    recall  f1-score   support

          0       0.89      0.95      0.92      2378
          1       0.74      0.54      0.63       622

avg / total       0.86      0.87      0.86      3000

