In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder

## Input data

In [2]:
df = pd.read_csv('./Churn_Modelling.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [5]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [6]:
X, y = df.drop(['Exited'], axis=1), df['Exited']

## Preprocessing data

In [7]:
geography_encoder = LabelEncoder()
gender_encoder = LabelEncoder()

In [8]:
X['Geography'] = geography_encoder.fit_transform(X['Geography'])

In [9]:
X['Gender'] = gender_encoder.fit_transform(X['Gender'])

In [10]:
X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1


In [11]:
X['Geography'].unique()

array([0, 2, 1])

In [12]:
one_hot_encoder = OneHotEncoder(categorical_features=[1])

In [13]:
X_encoded = one_hot_encoder.fit_transform(X).toarray()

In [14]:
X_encoded[0]

array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.1900000e+02,
       0.0000000e+00, 4.2000000e+01, 2.0000000e+00, 0.0000000e+00,
       1.0000000e+00, 1.0000000e+00, 1.0000000e+00, 1.0134888e+05])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=101)

In [16]:
scaler = MinMaxScaler()

In [17]:
scaled_X_train = scaler.fit_transform(X_train)

In [18]:
print(X_train[0])
print(scaled_X_train[0])

[0.0000000e+00 0.0000000e+00 1.0000000e+00 5.1100000e+02 0.0000000e+00
 2.9000000e+01 9.0000000e+00 0.0000000e+00 2.0000000e+00 0.0000000e+00
 1.0000000e+00 1.4067698e+05]
[0.         0.         1.         0.322      0.         0.14864865
 0.9        0.         0.33333333 0.         1.         0.70347065]


In [19]:
scaled_X_test = scaler.transform(X_test)

In [20]:
scaled_X_test[0]

array([0.        , 0.        , 1.        , 0.538     , 1.        ,
       0.45945946, 0.8       , 0.        , 0.33333333, 1.        ,
       1.        , 0.61627849])

## Model

In [21]:
def get_classifier():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(24, activation='relu', input_dim=12))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(12, activation='relu'))
    model.add(tf.keras.layers.Dense(2, activation='softmax'))
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

In [22]:
model = get_classifier()

In [23]:
model.fit(scaled_X_train, y_train, epochs=50)

Epoch 1/50

Epoch 2/50

Epoch 3/50

Epoch 4/50

Epoch 5/50

Epoch 6/50

Epoch 7/50

Epoch 8/50

Epoch 9/50

Epoch 10/50

Epoch 11/50

Epoch 12/50

Epoch 13/50

Epoch 14/50

Epoch 15/50

Epoch 16/50

Epoch 17/50

Epoch 18/50

Epoch 19/50

Epoch 20/50

Epoch 21/50

Epoch 22/50

Epoch 23/50

Epoch 24/50

Epoch 25/50

Epoch 26/50

Epoch 27/50

Epoch 28/50

Epoch 29/50

Epoch 30/50

Epoch 31/50

Epoch 32/50

Epoch 33/50

Epoch 34/50

Epoch 35/50

Epoch 36/50

Epoch 37/50

Epoch 38/50

Epoch 39/50

Epoch 40/50

Epoch 41/50

Epoch 42/50

Epoch 43/50

Epoch 44/50

Epoch 45/50

Epoch 46/50

Epoch 47/50

Epoch 48/50

Epoch 49/50

Epoch 50/50



<tensorflow.python.keras._impl.keras.callbacks.History at 0x114b6b390>

## Prediction

In [24]:
y_pred = model.predict(scaled_X_test)

In [25]:
y_pred

array([[0.9809176 , 0.01908247],
       [0.96836925, 0.03163073],
       [0.623644  , 0.37635595],
       ...,
       [0.98996454, 0.01003543],
       [0.49396327, 0.5060367 ],
       [0.57035345, 0.42964652]], dtype=float32)

In [26]:
from sklearn.metrics import classification_report

In [27]:
y_pred = [np.argmax(x) for x in y_pred]

In [28]:
print(classification_report(y_test, y_pred))

             precision    recall  f1-score   support

          0       0.87      0.96      0.92      2378
          1       0.76      0.47      0.58       622

avg / total       0.85      0.86      0.85      3000



So should we say goodbye to that customer ?

```
Geography: France
Credit Score: 600
Gender: Male
Age: 40 years old
Tenure: 3 years
Balance: $60000
Number of Products: 2
Does this customer have a credit card ? Yes
Is this customer an Active Member: Yes
Estimated Salary: $50000
```

In [29]:
geography_encoder.transform(['France'])

array([0])

In [30]:
gender_encoder.transform(['Male'])

array([1])

In [31]:
print(X.iloc[0])
print(X.iloc[0].values)

CreditScore           619.00
Geography               0.00
Gender                  0.00
Age                    42.00
Tenure                  2.00
Balance                 0.00
NumOfProducts           1.00
HasCrCard               1.00
IsActiveMember          1.00
EstimatedSalary    101348.88
Name: 0, dtype: float64
[6.1900000e+02 0.0000000e+00 0.0000000e+00 4.2000000e+01 2.0000000e+00
 0.0000000e+00 1.0000000e+00 1.0000000e+00 1.0000000e+00 1.0134888e+05]


In [32]:
new_data = np.array([
    600,
    geography_encoder.transform(['France']),
    gender_encoder.transform(['Male']),
    40,
    3,
    60000,
    2,
    1,
    1,
    50000
])

In [33]:
new_data = one_hot_encoder.transform([new_data]).toarray()

In [34]:
print(new_data)

[[1.e+00 0.e+00 0.e+00 6.e+02 1.e+00 4.e+01 3.e+00 6.e+04 2.e+00 1.e+00
  1.e+00 5.e+04]]


In [35]:
model.predict(new_data)

array([[0., 1.]], dtype=float32)

In [36]:
y_pred = [np.argmax(x) for x in model.predict(new_data)]

In [37]:
y_pred

[1]

In [38]:
classifier = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=get_classifier, batch_size=12, epochs=50)

In [39]:
accuracies = cross_val_score(estimator=classifier, X=scaled_X_train, y=y_train, cv=2)

Epoch 1/50

Epoch 2/50

Epoch 3/50

Epoch 4/50

Epoch 5/50

Epoch 6/50

Epoch 7/50

Epoch 8/50

Epoch 9/50

Epoch 10/50

Epoch 11/50

Epoch 12/50

Epoch 13/50

Epoch 14/50

Epoch 15/50

Epoch 16/50

Epoch 17/50

Epoch 18/50

Epoch 19/50

Epoch 20/50

Epoch 21/50

Epoch 22/50

Epoch 23/50

Epoch 24/50

Epoch 25/50

Epoch 26/50

Epoch 27/50

Epoch 28/50

Epoch 29/50

Epoch 30/50

Epoch 31/50

Epoch 32/50

Epoch 33/50

Epoch 34/50

Epoch 35/50

Epoch 36/50

Epoch 37/50

Epoch 38/50

Epoch 39/50

Epoch 40/50

Epoch 41/50

Epoch 42/50

Epoch 43/50

Epoch 44/50

Epoch 45/50

Epoch 46/50

Epoch 47/50

Epoch 48/50

Epoch 49/50

Epoch 50/50


Epoch 1/50

Epoch 2/50

Epoch 3/50

Epoch 4/50

Epoch 5/50

Epoch 6/50

Epoch 7/50

Epoch 8/50

Epoch 9/50

Epoch 10/50

Epoch 11/50

Epoch 12/50

Epoch 13/50

Epoch 14/50

Epoch 15/50

Epoch 16/50

Epoch 17/50

Epoch 18/50

Epoch 19/50

Epoch 20/50

Epoch 21/50

Epoch 22/50

Epoch 23/50

Epoch 24/50

Epoch 25/50

Epoch 26/50

Epoch 27/50

Epoch 28/50

Epo

In [40]:
accuracies.mean()

0.849857144185475

In [41]:
accuracies.std()

0.0027142855439867275