In [18]:
import pandas as pd
import numpy as np

import tensorflow as tf 
#random_state = 10
#np.random.seed(random_state)
#tf.random.set_seed(random_state)


# Matplotlib for visualization
from matplotlib import pyplot as plt
# display plots in the notebook
%matplotlib inline

# Seaborn for easier visualization
import seaborn as sns
### sns.set_style('darkgrid')

# store elements as dictionary keys and their counts as dictionary values
from collections import Counter

# scikit-learn
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder, LabelBinarizer
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

# Function for creating model pipelines - imblearn
from imblearn.pipeline import make_pipeline as imbl_pipe

# Over-sampling using SMOTE
from imblearn.over_sampling import SMOTE

# Classification metrics
from sklearn.metrics import confusion_matrix, classification_report

# Keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.models import Sequential

In [9]:
df = pd.read_csv('../Churn_Modelling.csv')
df =df.drop(columns=['RowNumber','CustomerId','Surname'])
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [10]:
# Seperate Dataframe into features and response
X, y = df.loc[:,df.columns != 'Exited'], df.Exited
print(X.shape, y.shape)

(10000, 10) (10000,)


In [11]:
# List numerical features
num_columns = X.select_dtypes(include='number').columns.tolist()
num_columns

['CreditScore',
 'Age',
 'Tenure',
 'Balance',
 'NumOfProducts',
 'HasCrCard',
 'IsActiveMember',
 'EstimatedSalary']

In [12]:
# List categorical features
cat_columns = X.select_dtypes(include='object').columns.tolist()
cat_columns

['Geography', 'Gender']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size =0.7, random_state = 1)
X_train.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
2228,644,France,Female,37,8,0.0,2,1,0,20968.88
5910,481,France,Female,39,6,0.0,1,1,1,24677.54
1950,680,France,Female,37,10,123806.28,1,1,0,81776.84
2119,690,France,Male,29,5,0.0,2,1,0,108577.97
5947,656,France,Female,45,7,145933.27,1,1,1,199392.14


In [14]:
num_features = [] 

for i in num_columns:
    location = X.columns.get_loc(i)
    num_features.append(location)
print(num_features)  

cat_features = []

for i in cat_columns:
    location = X.columns.get_loc(i)
    cat_features.append(location)
print(cat_features) 
preprocess = make_column_transformer(
    (MinMaxScaler(), num_features),
    (OneHotEncoder(sparse=False), cat_features)
)
preprocess

[0, 3, 4, 5, 6, 7, 8, 9]
[1, 2]


ColumnTransformer(transformers=[('minmaxscaler', MinMaxScaler(),
                                 [0, 3, 4, 5, 6, 7, 8, 9]),
                                ('onehotencoder', OneHotEncoder(sparse=False),
                                 [1, 2])])

In [22]:
# without SMOTE
import keras_model

#from keras.wrappers.scikit_learn import KerasClassifier


keras_clf = KerasClassifier(build_fn=keras_model.create_model, verbose=0)

model = imbl_pipe(preprocess,keras_clf)                 
model

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  [0, 3, 4, 5, 6, 7, 8, 9]),
                                                 ('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  [1, 2])])),
                ('kerasclassifier',
                 <tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x00000213C026B308>)])

In [23]:
param_grid = {
    'kerasclassifier__epochs': [10],
    'kerasclassifier__n_units': [64, 128, 100],
    #'kerasclassifier__init': [ 'uniform', 'zeros', 'normal', ], 
    #'kerasclassifier__batch_size':[4, 16, 32],
    #'kerasclassifier__optimizer':['RMSprop', 'Adam', 'Adamax', 'sgd'],
    'kerasclassifier__dropout': [0.5, 0.3, 0.2, 0.1, 0],
    'kerasclassifier__verbose': [0]
}

In [24]:
dl_grid = GridSearchCV(model, param_grid, cv= 10, scoring='accuracy')
dl_grid.fit(X_train, y_train)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


KeyboardInterrupt: 

In [None]:
print(dl_grid.best_params_)
print(dl_grid.best_score_)

In [None]:
print(f"Training Data Score: {dl_grid.score(X_train, y_train)}")
print(f"Testing Data Score: {dl_grid.score(X_test, y_test)}")

In [None]:
predictions = dl_grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)
print(cm)
print(classification_report(y_test, predictions))

In [None]:
model2 = imbl_pipe(preprocess,
                  SMOTE(sampling_strategy='auto', random_state=random_state),
                  keras_clf)

model2

In [None]:
dl_grid = GridSearchCV(model2, param_grid, cv= 10, scoring='accuracy')
dl_grid.fit(X_train, y_train)

In [None]:
print(dl_grid.best_params_)
print(dl_grid.best_score_)

In [None]:
print(f"Training Data Score: {dl_grid.score(X_train, y_train)}")
print(f"Testing Data Score: {dl_grid.score(X_test, y_test)}")

In [None]:
predictions = dl_grid.predict(X_test)
cm = confusion_matrix(y_test, predictions)
print(cm)
print(classification_report(y_test, predictions))