# Keras - binary classification - customer churn

### Setup notebook

In [None]:
# Importing the libraries
#import os, sys
#from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from mymods.lauthom import *

%matplotlib inline

### Import data

In [None]:
file = get_path('_data', 'Churn_Modelling'); file

In [None]:
dataset = pd.read_csv(file)
dataset.head()

In [None]:
# first columns do not contain data for model
X = dataset.iloc[:, 3:13].values  
y = dataset.iloc[:, 13].values

In [None]:
X[:5]

In [None]:
# Encoding categorical data (country, sex) into numbers
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()  # first encoder object - country
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])

labelencoder_X_2 = LabelEncoder()  # second encoder object - sex
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

In [None]:
X[:5].astype('i')

#### Dummy vars

In [None]:
# create dummy variables for multiclass feature {index=1}
onehotencoder = OneHotEncoder(categorical_features=[1])
X = onehotencoder.fit_transform(X).toarray()

# delete first column
# dummy var trap; [1,0,0] == [0,0] => 2 dummy vars define 3 classes
X = X[:, 1:] 

In [None]:
X[:5].astype('i')

#### Train test split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test) # object with state, reuse for predictions

## Keras NN

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

### Build model

In [None]:
init = keras.initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=13)

# Add sequentially all layers
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(units=6, kernel_initializer=init,
                     activation='relu', input_dim=11))
classifier.add(Dropout(0))
    
# Adding the second hidden layer
classifier.add(Dense(units=8, kernel_initializer=init,
                     activation='relu'))
classifier.add(Dropout(0))

# Adding the third hidden layer
classifier.add(Dense(units=4, kernel_initializer=init,
                     activation='relu'))
classifier.add(Dropout(0))

# Adding the output layer
classifier.add(Dense(units=1, kernel_initializer=init,
                     activation='sigmoid'))

### Compile model

In [None]:
# Compiling the ANN
classifier.compile(optimizer='adam', loss='binary_crossentropy',
                   metrics=['accuracy'])

#### Model architecture

In [None]:
classifier.summary()

### Train model

In [None]:
# set constants
EPOCHS = 40
BATCH_SIZE = 16

classifier.fit(X_train, 
               y_train, 
               batch_size=BATCH_SIZE, 
               epochs=EPOCHS)

### Prediction

In [None]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)
cm
# Accuracy
np.sum(np.eye(2) * cm) / np.sum(cm)

### Predict new data

In [None]:
# predict single new data; one customer churn
# first compulsory preprocessing of data; dummy vars, scaling
# reuse scaling object
# 0.0 at start of array will change type to FP
new_pred = classifier.predict(sc.transform(np.array([[0.0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
new_pred = (new_pred > 0.5)
new_pred[0][0]

## Evaluating, Improving and Tuning the ANN

Using sklearn framework.

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV

In [None]:
def build_classifier():
    """Evaluating the ANN"""
    classifier = Sequential()
    classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu', input_dim=11))
    classifier.add(Dense(units=6, kernel_initializer='uniform', activation='relu'))
    classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
    
    classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return classifier

In [None]:
classifier = KerasClassifier(build_fn=build_classifier, 
                             batch_size=BATCH_SIZE, 
                             epochs=EPOCHS)


In [None]:
classifier.

In [None]:
dictify(classifier.get_config()[0])

In [None]:
# accuracies = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=5, n_jobs=-1)
# print('done - seems to hang')

In [None]:
# mean = accuracies.mean()
# variance = accuracies.std()
# accuracies, mean, variance

### Dropout Regularization to reduce overfitting if needed

In [None]:
def build_classifier(optimizer, initializer, dropout = 0.15):
    classifier = Sequential()
    classifier.add(Dense(units = 6, kernel_initializer = initializer, activation = 'relu', input_dim = 11))
    
    classifier.add(Dense(units = 12, kernel_initializer = initializer, activation = 'relu'))
    classifier.add(Dropout(dropout))
    
    classifier.add(Dense(units = 4, kernel_initializer = initializer, activation = 'relu'))
    
    classifier.add(Dense(units = 1, kernel_initializer = initializer, activation = 'sigmoid'))
    
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

In [None]:
classifier = KerasClassifier(build_fn = build_classifier)

truncn = keras.initializers.TruncatedNormal(mean = 0.0, stddev = 0.05, seed = 13)
hen = keras.initializers.he_normal(seed = 13)

In [None]:
parameters = {'batch_size': [8, 16],
              'epochs': [64, 128],
              'optimizer': ['adam', 'rmsprop'],
              'initializer': [truncn, hen]}

grid_search = GridSearchCV(estimator=classifier,
                           param_grid=parameters,
                           scoring='accuracy',
                           cv=5)

In [None]:
grid_search = grid_search.fit(X_train, y_train)

In [None]:
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_