Create a simple Feed Forward network with PyTorch using Bank Customer Churn dataset....(Churn_Modelling.csv)

For any model following steps are needed: 

- Import - necessary libraries
- [Optional] Set the Device - as "msp" or "CUDA"
- [Optional] EDA Prepare the data for train and test 
    - Read the Dataset
    - Perform EDA and Cleanup
    - [Optional] Split the data and create X and y 
- Create Fully Connected Network - using PyTorch 
- Load Data - using dataset loader
- Initialize network - ready for training
- Loss and optimizer - define 
- Train the Network - to calculate weights
- Check accuracy on training & test dataset

Import - necessary libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F

EDA

In [2]:
file_path = "Churn_Modelling.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1,inplace=True)
df.replace({'Female': 1, 'Male': 0}, inplace=True)
df = pd.get_dummies(data=df, columns=["Geography"], dtype='int64')
X = df.drop(["Exited"], axis=1).to_numpy(dtype='float32')
y = df["Exited"].to_numpy(dtype='float32')
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=1)
X_train = torch.from_numpy(X_train)
X_test  = torch.from_numpy(X_test)
y_train = torch.from_numpy(y_train)
y_train = y_train.reshape(len(y_train), 1)
y_test  = torch.from_numpy(y_test)
y_test  = y_test.reshape(len(y_test), 1)

Define Network

In [4]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 6)
        self.fc2 = nn.Linear(6, num_classes)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.sigmoid(self.fc2(x))
        return x

Set Hyperparameters

In [5]:
input_size = 12
num_classes = 1
learning_rate = 0.1
num_epochs = 500
print_offset = 10

device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'

Initialize the model, Loss Function and Optimizer

In [6]:
model = NN(input_size, num_classes).to(device)
print(f"Model Architecture: \n {model}")
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Model Architecture: 
 NN(
  (fc1): Linear(in_features=12, out_features=6, bias=True)
  (fc2): Linear(in_features=6, out_features=1, bias=True)
)


Train the Model

- Run A) and B) - Separate Train and Validation
- Run C) - Train and Validation Together

In [7]:
# A) Train the model First and Then do Validation in next Section
X_train = X_train.to(device=device)
y_train = y_train.to(device=device)

for epoch in range(1, num_epochs+1):

    # Forward - 
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    
    # Print loss and accuracy on evry epoch
    if (epoch % print_offset == 0) or (epoch == num_epochs):
        print(f"epoch= {epoch}, train loss= {round(loss.item(), 4)}") # Round the loss upto 4 decimel
    
    # Backward - 
    optimizer.zero_grad()
    loss.backward()

    # Gradient desent with Adam optimizer
    optimizer.step()

epoch= 10, train loss= 0.4685
epoch= 20, train loss= 0.4178
epoch= 30, train loss= 0.3972
epoch= 40, train loss= 0.3649
epoch= 50, train loss= 0.3515
epoch= 60, train loss= 0.3477
epoch= 70, train loss= 0.3452
epoch= 80, train loss= 0.3437
epoch= 90, train loss= 0.3428
epoch= 100, train loss= 0.342
epoch= 110, train loss= 0.3412
epoch= 120, train loss= 0.3405
epoch= 130, train loss= 0.3393
epoch= 140, train loss= 0.3376
epoch= 150, train loss= 0.3365
epoch= 160, train loss= 0.3358
epoch= 170, train loss= 0.3354
epoch= 180, train loss= 0.3352
epoch= 190, train loss= 0.3352
epoch= 200, train loss= 0.3348
epoch= 210, train loss= 0.3347
epoch= 220, train loss= 0.3349
epoch= 230, train loss= 0.3347
epoch= 240, train loss= 0.3348
epoch= 250, train loss= 0.3348
epoch= 260, train loss= 0.3347
epoch= 270, train loss= 0.3348
epoch= 280, train loss= 0.3346
epoch= 290, train loss= 0.3348
epoch= 300, train loss= 0.3347
epoch= 310, train loss= 0.3349
epoch= 320, train loss= 0.3348
epoch= 330, train 

In [8]:
# B) Test Validation after training in previous section
def check_accuracy(X, y, model):
    corr_pred  = 0
    num_sample = len(X)
    model.eval() # Set the model to eveluate mode

    with torch.no_grad():
        X = X.to(device=device)
        y = y.to(device=device)

        model_pred = model(X)
        y_pred = model_pred.round()
        corr_pred += (y_pred == y).sum()
    
    model.train() # Set the model back to traning mode
    return corr_pred/num_sample

print(f"Accuracy on training set: {check_accuracy(X_train, y_train, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(X_test, y_test, model)*100:.2f}")

Accuracy on training set: 86.13
Accuracy on test set: 86.36


In [9]:
# C) Train the model and find test accuracy together
X_train = X_train.to(device=device)
y_train = y_train.to(device=device)
X_test = X_test.to(device=device)
y_test = y_test.to(device=device)

for epoch in range (1, num_epochs+1):
    
    # Forward:
    y_train_pred = model(X_train)
    loss = criterion(y_train_pred, y_train)

    # Check model accuracy on Train dataset
    y_train_pred_class = y_train_pred.round()
    accuracy_train = y_train_pred_class.eq(y_train).sum()/len(y_train)
    
    # Backward
    optimizer.zero_grad()
    loss.backward()

    # Gradient descent with Adam optimizer
    optimizer.step()
    
    # Check the model accuracy on test dataset
    with torch.no_grad():
        y_test_pred = model(X_test)
        y_test_pred_class = y_test_pred.round()
        accuracy_test = y_test_pred_class.eq(y_test).sum()/len(y_test)

    if (epoch % print_offset == 0) or (epoch == num_epochs):
        print(f"epoch= {epoch}/{num_epochs}, loss= {round(loss.item(), 4)}, train accuracy = {round(accuracy_train.item(), 4)}, test accuracy = {round(accuracy_test.item(), 4)}") # Round the loss upto 4 decimel

epoch= 10/500, loss= 0.3349, train accuracy = 0.8623, test accuracy = 0.8628
epoch= 20/500, loss= 0.3346, train accuracy = 0.8608, test accuracy = 0.8628
epoch= 30/500, loss= 0.3349, train accuracy = 0.8604, test accuracy = 0.8628
epoch= 40/500, loss= 0.3346, train accuracy = 0.8619, test accuracy = 0.8628
epoch= 50/500, loss= 0.3351, train accuracy = 0.862, test accuracy = 0.864
epoch= 60/500, loss= 0.3346, train accuracy = 0.8607, test accuracy = 0.862
epoch= 70/500, loss= 0.3347, train accuracy = 0.8608, test accuracy = 0.8628
epoch= 80/500, loss= 0.3347, train accuracy = 0.8612, test accuracy = 0.864
epoch= 90/500, loss= 0.3349, train accuracy = 0.8623, test accuracy = 0.8632
epoch= 100/500, loss= 0.3346, train accuracy = 0.8609, test accuracy = 0.8624
epoch= 110/500, loss= 0.335, train accuracy = 0.8597, test accuracy = 0.8636
epoch= 120/500, loss= 0.3346, train accuracy = 0.8608, test accuracy = 0.8628
epoch= 130/500, loss= 0.3349, train accuracy = 0.8617, test accuracy = 0.8648
