In [1]:
import os
import logging
import copy

#Set TensorFlow logging level to suppress warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
#Set absl logging level to suppress warnings
logging.getLogger('absl').setLevel(logging.ERROR)

#Standard Data Packages
import pandas as pd
import numpy as np

#Visualization Packages
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

#tf and keras
import tensorflow as tf
from keras import models
from keras import layers

#Scikit
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn import preprocessing


In [2]:
train_set = pd.read_csv('../data/train_set.csv')
val_set = pd.read_csv('../data/val_set.csv')

In [3]:
features = ["CreditScore", "Geography", "Gender", "Age", "Tenure", "Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"]

X_train = train_set[features]
Y_train = train_set["Exited"]

X_val = val_set[features]
Y_val = val_set["Exited"]

In [4]:
X_train.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,585,france,female,53.0,2,0.0,2,1.0,1.0,91830.75
1,606,france,male,24.0,2,0.0,1,0.0,1.0,90876.95
2,633,germany,male,44.0,1,118907.6,1,1.0,0.0,47777.15
3,602,germany,male,40.0,2,149961.99,2,1.0,1.0,82696.84
4,645,france,male,31.0,4,132351.29,1,1.0,0.0,151887.16


In [5]:
numeric_features = ["CreditScore", "Age", "Tenure", "Balance", "NumOfProducts", "EstimatedSalary"]
X_train_numeric = X_train[numeric_features]
X_val_numeric = X_val[numeric_features]

categorical_features = ["Geography", "Gender", "HasCrCard", "IsActiveMember"]
X_train_categorical = X_train[categorical_features]
X_val_categorical = X_val[categorical_features]

In [6]:
display(X_train_numeric.head())
display(X_train_categorical.head())

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary
0,585,53.0,2,0.0,2,91830.75
1,606,24.0,2,0.0,1,90876.95
2,633,44.0,1,118907.6,1,47777.15
3,602,40.0,2,149961.99,2,82696.84
4,645,31.0,4,132351.29,1,151887.16


Unnamed: 0,Geography,Gender,HasCrCard,IsActiveMember
0,france,female,1.0,1.0
1,france,male,0.0,1.0
2,germany,male,1.0,0.0
3,germany,male,1.0,1.0
4,france,male,1.0,0.0


In [7]:
X_train_numeric = X_train_numeric.to_numpy()
X_train_categorical = X_train_categorical.to_numpy()

X_val_numeric = X_val_numeric.to_numpy()
X_val_categorical = X_val_categorical.to_numpy()

In [8]:
def build_model2(hidden_layers = [32], activation = "relu", optimizer = "SGD", learning_rate = 0.01):
    
    tf.keras.backend.clear_session()
    tf.random.set_seed(0)

    credit_score = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "CreditScore")
    geography = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "Geography")
    gender = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "Gender")
    age = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "Age")
    tenure = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "Tenure")
    balance = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "Balance")
    numProducts = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "NumOfProducts")
    crCard = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "HasCrCard")
    activeMember = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "IsActiveMember")
    salary = tf.keras.layers.Input(shape = (1, ), dtype = tf.float64, name = "EstimatedSalary")

    features = tf.keras.layers.Concatenate()([credit_score, geography, gender, age, tenure, balance, numProducts, crCard, activeMember, salary])

    curr_layer = features
    
    for units in hidden_layers:
        new_layer = tf.keras.layers.Dense(units = units, activation = activation)(curr_layer)
        curr_layer = new_layer

    #add output layer
    dx = tf.keras.layers.Dense(units = 1, activation = "sigmoid", name = "Dx")(curr_layer)
    
    #configure model
    model = tf.keras.Model(inputs = [credit_score, geography, gender, age, tenure, balance, numProducts, crCard, activeMember, salary], outputs = dx, name = "model2")

    if optimizer == "SGD":
        optimizer = tf.keras.optimizers.SGD(learning_rate = learning_rate)
    elif optimizer == "Adam":
        optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    elif optimizer == "RMS":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate = learning_rate)
    
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    
    return model

In [9]:
epoch_list = [10, 15, 20, 30]
batch_list = [32, 64, 128]
layers_list = [[32, 16], [64, 32], [128, 64], [32, 16, 8], [64, 32, 16], [128, 64, 32], [32, 64, 128, 256, 128, 64, 32]]
activation_list = ["relu", "tanh", "elu"]
optimizer_list = ["SGD", "Adam", "RMSProp"]
learning_list = [0.01, 0.001]

In [None]:
kfold = KFold(n_splits = 3, shuffle = True, random_state = 0)

fold_losses = {}
fold_accs = {}

bestModel = None
best_acc = 0.0
best_loss = 0.0

for num_epochs in epoch_list:
    for batch_size in batch_list:
        for num_layers in layers_list:
            for activation_func in activation_list:
                for optimizer in optimizer_list:
                    for learning_rate in learning_list:
                        for fold_i, (train_ids, val_ids) in enumerate(kfold.split(X_train_numeric)):    
                            
                            model = build_model2(hidden_layers = num_layers, activation = activation_func, optimizer = optimizer, learning_rate = learning_rate)
                            
                            fold_train_num_features = X_train_numeric[train_ids]
                            fold_train_cat_features = X_train_categorical[train_ids]
            
                            fold_val_num_features = X_train_numeric[val_ids]
                            fold_val_cat_features = X_train_categorical[val_ids]
                            
                            scaler = preprocessing.MinMaxScaler().fit(fold_train_num_features)
                            fold_train_num_features = scaler.transform(fold_train_num_features)
                            fold_val_num_features = scaler.transform(fold_val_num_features)
                            val_num_features = scaler.transform(X_val_numeric)
            
                            val_cat_features = copy.deepcopy(X_val_categorical)
            
                            for i in range(len(categorical_features)):
                                label_encoder = preprocessing.LabelEncoder().fit(fold_train_cat_features[:,i])
                                fold_train_cat_features[:,i] = label_encoder.transform(fold_train_cat_features[:,i])
                                fold_val_cat_features[:,i] = label_encoder.transform(fold_val_cat_features[:,i])
                                val_cat_features[:,i] = label_encoder.transform(X_val_categorical[:,i])
            
                            
                            fold_train_num_features = fold_train_num_features.astype('float64')
                            fold_train_cat_features = fold_train_cat_features.astype('float64')
                            fold_val_num_features = fold_val_num_features.astype('float64')
                            fold_val_cat_features = fold_val_cat_features.astype('float64')
                            val_num_features = val_num_features.astype('float64')
                            val_cat_features = val_cat_features.astype('float64')
            
                            model.fit(
                                x = {
                                    "CreditScore": fold_train_num_features[:, 0],
                                    "Age": fold_train_num_features[:, 1],
                                    "Tenure": fold_train_num_features[:, 2],
                                    "Balance": fold_train_num_features[:, 3],
                                    "NumOfProducts": fold_train_num_features[:, 4],
                                    "EstimatedSalary": fold_train_num_features[:, 5],
                                    "Geography": fold_train_cat_features[:, 0],
                                    "Gender": fold_train_cat_features[:, 1],
                                    "HasCrCard": fold_train_cat_features[:, 2],
                                    "IsActiveMember": fold_train_cat_features[:, 3]
                                },
                                y = Y_train[train_ids],
                                epochs = num_epochs,
                                batch_size = batch_size,
                                validation_data = (
                                    {
                                        "CreditScore": fold_val_num_features[:, 0],
                                        "Age": fold_val_num_features[:, 1],
                                        "Tenure": fold_val_num_features[:, 2],
                                        "Balance": fold_val_num_features[:, 3],
                                        "NumOfProducts": fold_val_num_features[:, 4],
                                        "EstimatedSalary": fold_val_num_features[:, 5],
                                        "Geography": fold_val_cat_features[:, 0],
                                        "Gender": fold_val_cat_features[:, 1],
                                        "HasCrCard": fold_val_cat_features[:, 2],
                                        "IsActiveMember": fold_val_cat_features[:, 3]
                                    },
                                    Y_train[val_ids]),
                                verbose = 0)
                        
                            val_loss, val_acc = model.evaluate(
                                {
                                    "CreditScore": val_num_features[:, 0],
                                    "Age": val_num_features[:, 1],
                                    "Tenure": val_num_features[:, 2],
                                    "Balance": val_num_features[:, 3],
                                    "NumOfProducts": val_num_features[:, 4],
                                    "EstimatedSalary": val_num_features[:, 5],
                                    "Geography": val_cat_features[:, 0],
                                    "Gender": val_cat_features[:, 1],
                                    "HasCrCard": val_cat_features[:, 2],
                                    "IsActiveMember": val_cat_features[:, 3]
                                },
                                Y_val, 
                                batch_size = batch_size, 
                                verbose = 0)
                        
                            if round(val_acc, 4) > round(best_acc, 4):
                                print("New Best Model")
        
                                print(f'Epochs: {num_epochs}, Batch Size: {batch_size}, Layers: {str(num_layers)}, Activation: {activation_func}, Optimizer: {optimizer}, Learning Rate: {learning_rate}, Fold: {fold_i}, val acc: {val_acc:.3f}, val loss: {val_loss}')
                                
                                bestModel = model
                                best_acc = val_acc
                                best_loss = val_loss
                            elif (round(val_acc, 4) == round(best_acc, 4)) and (round(val_loss, 4) < round(best_loss, 4)):
                                print("New Best Model")
        
                                print(f'Epochs: {num_epochs}, Batch Size: {batch_size}, Layers: {str(num_layers)}, Activation: {activation_func}, Optimizer: {optimizer}, Learning Rate: {learning_rate}, Fold: {fold_i}, val acc: {val_acc:.3f}, val loss: {val_loss}')
                                
                                bestModel = model
                                best_acc = val_acc
                                best_loss = val_loss

New Best Model
Epochs: 10, Batch Size: 32, Layers: [32, 16], Activation: relu, Optimizer: SGD, Learning Rate: 0.01, Fold: 0, val acc: 0.840, val loss: 0.38243088126182556
New Best Model
Epochs: 10, Batch Size: 32, Layers: [32, 16], Activation: relu, Optimizer: SGD, Learning Rate: 0.01, Fold: 1, val acc: 0.855, val loss: 0.34353917837142944
New Best Model
Epochs: 10, Batch Size: 32, Layers: [32, 16], Activation: relu, Optimizer: Adam, Learning Rate: 0.01, Fold: 0, val acc: 0.862, val loss: 0.3297117054462433
New Best Model
Epochs: 10, Batch Size: 32, Layers: [32, 16], Activation: relu, Optimizer: Adam, Learning Rate: 0.01, Fold: 1, val acc: 0.866, val loss: 0.3241131007671356
New Best Model
Epochs: 10, Batch Size: 32, Layers: [64, 32], Activation: relu, Optimizer: Adam, Learning Rate: 0.001, Fold: 1, val acc: 0.866, val loss: 0.3228359520435333


In [None]:
bestModel.save("MNN_cv.keras")

In [None]:
val_set = pd.read_csv('val_norm_set.csv')

features = ["CreditScore", "Geography", "Gender", "Age", "Tenure", "Balance", "NumOfProducts", "HasCrCard", "IsActiveMember", "EstimatedSalary"]

X_val = val_set[features]
Y_val = val_set["Exited"]

X_val = X_val.astype('float64')

In [None]:
predict_val = bestModel.predict({
            "CreditScore": X_val[["CreditScore"]],
            "Geography": X_val[["Geography"]],
            "Gender": X_val[["Gender"]],
            "Age": X_val[["Age"]],
            "Tenure": X_val[["Tenure"]],
            "Balance": X_val[["Balance"]],
            "NumOfProducts": X_val[["NumOfProducts"]],
            "HasCrCard": X_val[["HasCrCard"]],
            "IsActiveMember": X_val[["IsActiveMember"]],
            "EstimatedSalary": X_val[["EstimatedSalary"]],
        })

fpr, tpr, _ = metrics.roc_curve(Y_val, predict_val)
auc = metrics.roc_auc_score(Y_val, predict_val)
plt.plot(fpr, tpr, label = "data 1, auc = " + str(auc))
plt.legend(loc = 4)

predict_val[predict_val <= 0.5] = 0
predict_val[predict_val > 0.5] = 1

print(metrics.classification_report(Y_val, predict_val))

plt.show()

In [None]:
confusion_matrix = metrics.confusion_matrix(Y_val, predict_val)

plt.figure(figsize = (9,9))
sns.heatmap(confusion_matrix, annot = True, fmt = ".3f", linewidths=.5, square = True, cmap = "Blues_r")
plt.ylabel("Actual label")
plt.xlabel("Predicted label")
plt.title("Confusion Matrix", size = 15)

plt.show()