#### Classification Problem using Artificial Neural Network

In [92]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split 

In [127]:
df = pd.read_csv('./churn_modeling.csv')

In [94]:
df.shape

(10000, 14)

In [95]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


##### Apply basic Feature Engineering

In [128]:
# Drop not useful columns
df.drop(['RowNumber','CustomerId', 'Surname'], axis=1, inplace=True)

In [129]:
# Transform Gender using Label Encoder

le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

In [117]:
df['Gender'].value_counts()

Gender
1    5457
0    4543
Name: count, dtype: int64

In [130]:
# Transform Geopraphy column using OneHotEncoding

ohe= OneHotEncoder(drop='first', handle_unknown='ignore')
temp_df = ohe.fit_transform(df[['Geography']])
temp_df.toarray()
ohe.get_feature_names_out(['Geography'])


array(['Geography_Germany', 'Geography_Spain'], dtype=object)

In [131]:
temp_df.shape

(10000, 2)

In [132]:
temp_df = pd.DataFrame(temp_df.toarray(), columns=ohe.get_feature_names_out(['Geography']))

In [73]:
temp_df

Unnamed: 0,Geography_Germany,Geography_Spain
0,0.0,0.0
1,0.0,1.0
2,0.0,0.0
3,0.0,0.0
4,0.0,1.0
...,...,...
9995,0.0,0.0
9996,0.0,0.0
9997,0.0,0.0
9998,1.0,0.0


In [133]:
df.drop('Geography', axis=1, inplace=True)
pd.concat([df, temp_df], axis=1)

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain
0,619,0,42,2,0.00,1,1,1,101348.88,1,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,1.0
2,502,0,42,8,159660.80,3,1,0,113931.57,1,0.0,0.0
3,699,0,39,1,0.00,2,0,0,93826.63,0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.10,0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0,0.0,0.0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0,0.0,0.0
9997,709,0,36,7,0.00,1,0,1,42085.58,1,0.0,0.0
9998,772,1,42,3,75075.31,2,1,0,92888.52,1,1.0,0.0


In [134]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,42,2,0.0,1,1,1,101348.88,1
1,608,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,42,8,159660.8,3,1,0,113931.57,1
3,699,0,39,1,0.0,2,0,0,93826.63,0
4,850,0,43,2,125510.82,1,1,1,79084.1,0


In [135]:
# Split features and target
x = df.drop('Exited', axis=1)
y = df['Exited']

In [136]:
# Split data into train and test
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=10, test_size=0.2)

In [139]:
x.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,0,42,2,0.0,1,1,1,101348.88
1,608,0,41,1,83807.86,1,0,1,112542.58
2,502,0,42,8,159660.8,3,1,0,113931.57
3,699,0,39,1,0.0,2,0,0,93826.63
4,850,0,43,2,125510.82,1,1,1,79084.1


In [140]:
#Scale x_train, x_test using StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [141]:
# save label encoder, one hot encoder, standard scaler to pickle file

import pickle

with open('labelEncoder.pkl', 'wb') as file:
    pickle.dump(le,file)


with open('oneHotEncoder.pkl', 'wb') as file:
    pickle.dump(ohe,file)

with open('standardScaler.pkl', 'wb') as file:
    pickle.dump(scaler,file)

##### Artifical Neural Network (ANN) Implementation

Steps:
1)  Initialize a Sequential network
2)  Dense to create hidden neurons specifying number of neurons in each layer
3)  Apply activation functions (sigmoid, relu, tanh, leaky relu, etc), relu and its variants can be used in hidden layers, and depending on classes, sigmoid or softmax can be used
4)  Optimize the weights through Back progation
5)  Minimize the Loss function
6)  Metrics - Measure the accuracy using MSE, MAE, Accuracy, etc  
7)