In [1]:
# Part -1  Data preprocessing

#Importing libs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
# Importing the dataset

df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
#General characteristics of the data
def rstr(df): 
    obs = df.shape[0]
    types = df.dtypes
    counts = df.apply(lambda x: x.count())
    uniques = df.apply(lambda x: x.unique())
    nulls = df.apply(lambda x: x.isnull().sum())
    distincts = df.apply(lambda x: x.unique().shape[0])
    print('Data shape:', df.shape)

    cols = ['types', 'counts', 'distincts', 'nulls', 'uniques']
    str = pd.concat([types, counts, distincts, nulls, uniques], axis = 1, sort=True)

    str.columns = cols
    dtypes = str.types.value_counts()
    print('___________________________\nData types:\n',str.types.value_counts())
    print('___________________________')
    return str

details = rstr(df)
display(details)

Data shape: (10000, 14)
___________________________
Data types:
 int64      9
object     3
float64    2
Name: types, dtype: int64
___________________________


Unnamed: 0,types,counts,distincts,nulls,uniques
Age,int64,10000,70,0,"[42, 41, 39, 43, 44, 50, 29, 27, 31, 24, 34, 2..."
Balance,float64,10000,6382,0,"[0.0, 83807.86, 159660.8, 125510.82, 113755.78..."
CreditScore,int64,10000,460,0,"[619, 608, 502, 699, 850, 645, 822, 376, 501, ..."
CustomerId,int64,10000,10000,0,"[15634602, 15647311, 15619304, 15701354, 15737..."
EstimatedSalary,float64,10000,9999,0,"[101348.88, 112542.58, 113931.57, 93826.63, 79..."
Exited,int64,10000,2,0,"[1, 0]"
Gender,object,10000,2,0,"[Female, Male]"
Geography,object,10000,3,0,"[France, Spain, Germany]"
HasCrCard,int64,10000,2,0,"[1, 0]"
IsActiveMember,int64,10000,2,0,"[1, 0]"


In [4]:
#Create dummy variables
geography = pd.get_dummies(df['Geography'],drop_first=True)
gender = pd.get_dummies(df['Gender'],drop_first=True)


## Concatenate the Data Frames

df = pd.concat([df,geography,gender],axis=1)

## Drop Unnecessary columns
df = df.drop(['Geography','Gender'],axis=1)

In [5]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Germany,Spain,Male
0,1,15634602,Hargrave,619,42,2,0.0,1,1,1,101348.88,1,0,0,0
1,2,15647311,Hill,608,41,1,83807.86,1,0,1,112542.58,0,0,1,0
2,3,15619304,Onio,502,42,8,159660.8,3,1,0,113931.57,1,0,0,0
3,4,15701354,Boni,699,39,1,0.0,2,0,0,93826.63,0,0,0,0
4,5,15737888,Mitchell,850,43,2,125510.82,1,1,1,79084.1,0,0,1,0


In [6]:
# Splitting the dataset into the Training set and Test set
X = df.drop(['RowNumber','CustomerId','Surname','Exited'],axis=1)
y = df.iloc[:, 11]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 57)

In [7]:
X_train.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Germany,Spain,Male
4879,783,44,3,81811.71,1,1,0,164213.53,0,1,0
7414,737,45,2,99169.67,2,1,1,78650.95,1,0,0
5968,526,28,1,112070.44,1,0,1,126281.83,0,0,1
677,739,45,7,102703.62,1,0,1,147802.94,1,0,1
4583,624,38,8,0.0,2,1,0,95403.41,0,1,0


In [8]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

***Starting with ANN***

In [9]:
#Hyperparameter tuning
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    return model

In [10]:
#Random Search
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='project1',
    project_name='Churn')
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [11]:
tuner.search(X_train, y_train,
             epochs=5,
             validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 20s]
val_accuracy: 0.8570666710535685

Best val_accuracy So Far: 0.8570666710535685
Total elapsed time: 00h 01m 06s
INFO:tensorflow:Oracle triggered exit


In [15]:
tuner.get_best_hyperparameters

<bound method BaseTuner.get_best_hyperparameters of <kerastuner.tuners.randomsearch.RandomSearch object at 0x000001E04FDE53A0>>

In [17]:
tuner.get_best_models()

[<tensorflow.python.keras.engine.sequential.Sequential at 0x1e05f0535b0>]