<a href="https://colab.research.google.com/github/harnalashok/deeplearning/blob/main/Simple_NN_churn_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Last amended: 12th Nov, 2022
# Classification using Simple NN
# Using categorical cross enropy vs sparse cross entropy

In [1]:
# 1.0 Call libraries:
import pandas as pd
import numpy as np

# 1.1
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# 1.2
import tensorflow as tf

In [8]:
# 1.3 Mount gdrive:

from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [9]:
# 2.0 Read data:

pathToFolder = "/gdrive/MyDrive/Colab_data_files/bank_customers_churn/"

In [10]:
# 2.1
datafile = "Churn_Modelling.csv"

In [87]:
# 2.2
data = pd.read_csv(pathToFolder + datafile)

In [12]:
# 2.2.1
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [88]:
# 2.2.2 Drop some columns:
data.drop(columns = ['RowNumber', 'CustomerId', 'Surname'], inplace = True)

In [14]:
# 2.2.3 Ar there any nulls?
data.isnull().sum()

CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [15]:
# 2.2.4 Number of unique values per column:

for i in data.columns:
  print(i , data[i].nunique())

CreditScore 460
Geography 3
Gender 2
Age 70
Tenure 11
Balance 6382
NumOfProducts 4
HasCrCard 2
IsActiveMember 2
EstimatedSalary 9999
Exited 2


In [89]:
# 3.0 Segregate our features:

catFeatures = ['Geography', 'Gender',  'HasCrCard' , 'IsActiveMember' ]
numFeatures = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']

In [None]:
# 3.1 This fails as if does not apply to Series
# https://stackoverflow.com/a/71956180
data['Gender'].apply(lambda x :     1 if x is 'Female' else 0 )

0       0
1       0
2       0
3       0
4       0
       ..
9995    0
9996    0
9997    0
9998    0
9999    0
Name: Gender, Length: 10000, dtype: int64

In [90]:
# 3.2 Separate target and predictors:

y = data.pop('Exited')
X = data

In [91]:
# 3.3 Split dataset:

X_train, X_test, y_train,y_test = train_test_split(X, y ,
                                                   test_size = 0.15,
                                                   shuffle = True
                                                   )

In [92]:
# 4.0 Create a column transformer for transforming features:

ct = ColumnTransformer( 
                         [
                              ('ohe', OneHotEncoder(), catFeatures),
                              ('mm', MinMaxScaler() , numFeatures)
                          ],
                         remainder= 'passthrough'
                        )

In [93]:
# 4.1 fit
ct.fit(X_train)

ColumnTransformer(remainder='passthrough',
                  transformers=[('ohe', OneHotEncoder(),
                                 ['Geography', 'Gender', 'HasCrCard',
                                  'IsActiveMember']),
                                ('mm', MinMaxScaler(),
                                 ['CreditScore', 'Age', 'Tenure', 'Balance',
                                  'NumOfProducts', 'EstimatedSalary'])])

In [94]:
# 4.2 Transform data now
Xtr = ct.transform(X_train)
Xte = ct.transform(X_test)

In [118]:
# 4.3 Also transform target to OneHotEncoded form
#     We will use this form of target also:

ytr = tf.keras.utils.to_categorical(y_train.values.reshape(8500,1))
yte  = tf.keras.utils.to_categorical(y_test)
ytr

array([[1., 0.],
       [1., 0.],
       [0., 1.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [119]:
# 5.0 Blank model:

model = tf.keras.Sequential()

In [120]:
# 5.1
# model.add(tf.keras.Input(shape = (10, )))
model.add(tf.keras.Input(shape = (15, )))

In [121]:
# 5.2
model.add(tf.keras.layers.Dense(15, activation = 'relu'))

In [122]:
# 5.3
model.add(tf.keras.layers.Dense(20, activation = 'relu'))

In [123]:
# 5.4
# model.add(tf.keras.layers.Dense(1, activation = 'sigmoid'))  # tanh, No activation
model.add(tf.keras.layers.Dense(2, activation = 'softmax'))  # tanh, No activation

In [None]:
# 5.5
model.summary()

Refer [here](https://stats.stackexchange.com/a/420730)<br>
If your Yi's are one-hot encoded, use categorical_crossentropy. Examples (for a 3-class classification): [1,0,0] , [0,1,0], [0,0,1]

But if your Yi
's are integers, use sparse_categorical_crossentropy. Examples for above 3-class classification problem: [1] , [2], [3]

In [129]:
# 6.0 Compile the model and use 'ytr' in fit():
model.compile(loss =  'categorical_crossentropy' ,  #'binary_crossentropy',
              optimizer = 'adam',
              metrics = ['acc']
              )

In [131]:
# 6.0.1 Compile the model but hen use 'y_train; in fit():
model.compile(loss =  'sparse_categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['acc']
              )

In [None]:
# 7.0 If using categorical cross entropy or binary cross entropy:

model.fit(
           Xtr, ytr, 
           batch_size = 32,
           validation_split = 0.2,
           epochs = 50
          )

In [None]:
# 7.0.1 If using sparse categorical cross entropy:

model.fit(
           Xtr, y_train,
           batch_size = 32,
           validation_split = 0.2,
           epochs = 50
          )

In [None]:
################ Done #####################