In [187]:
from keras import Sequential
from keras import layers
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas as pd
from sklearn.model_selection import train_test_split

In [188]:
churn_data = pd.read_csv('./churn.csv')
churn_data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [189]:

# Extract features and labels
X = churn_data.drop(labels=['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis=1)
Y = churn_data['Exited']

X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [190]:
Y.head()

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64

In [191]:
# Label categorical data
lb = LabelEncoder()
X['Geography'] = lb.fit_transform(X['Geography'])
lb = LabelEncoder()
X['Gender'] = lb.fit_transform(X['Gender'])

X.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,0,42,2,0.0,1,1,1,101348.88
1,608,2,0,41,1,83807.86,1,0,1,112542.58
2,502,0,0,42,8,159660.8,3,1,0,113931.57
3,699,0,0,39,1,0.0,2,0,0,93826.63
4,850,2,0,43,2,125510.82,1,1,1,79084.1


In [192]:
X = pd.get_dummies(X, drop_first=True, columns=['Geography'])

X.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_1,Geography_2
0,619,0,42,2,0.0,1,1,1,101348.88,False,False
1,608,0,41,1,83807.86,1,0,1,112542.58,False,True
2,502,0,42,8,159660.8,3,1,0,113931.57,False,False
3,699,0,39,1,0.0,2,0,0,93826.63,False,False
4,850,0,43,2,125510.82,1,1,1,79084.1,False,True


In [193]:
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

# train is now 75% of the entire data set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1 - train_ratio, stratify=Y)

# test is now 10% of the initial data set
# validation is now 15% of the initial data set
X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=test_ratio/(test_ratio + validation_ratio), random_state = 0, stratify=Y_test)

In [194]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

In [207]:
model = Sequential(layers=[
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train,
    Y_train,
    batch_size=10,
    epochs=10,
    validation_data=(X_val, Y_val),
    verbose=1
)

Epoch 1/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 820us/step - accuracy: 0.7670 - loss: 0.4803 - val_accuracy: 0.8453 - val_loss: 0.3936
Epoch 2/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 671us/step - accuracy: 0.8424 - loss: 0.3767 - val_accuracy: 0.8527 - val_loss: 0.3730
Epoch 3/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 678us/step - accuracy: 0.8573 - loss: 0.3501 - val_accuracy: 0.8553 - val_loss: 0.3671
Epoch 4/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 676us/step - accuracy: 0.8601 - loss: 0.3381 - val_accuracy: 0.8473 - val_loss: 0.3716
Epoch 5/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 678us/step - accuracy: 0.8585 - loss: 0.3406 - val_accuracy: 0.8573 - val_loss: 0.3620
Epoch 6/20
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 682us/step - accuracy: 0.8604 - loss: 0.3428 - val_accuracy: 0.8587 - val_loss: 0.3638
Epoch 7/20
[1m7

In [204]:
results = model.evaluate(X_test, Y_test)
print("test loss, test acc:", results)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 613us/step - accuracy: 0.8801 - loss: 0.3036
test loss, test acc: [0.3473355174064636, 0.8600000143051147]
