# Introduction
Use PyTorch to develop a binary classifier for the Diabetes dataset

In [1]:
# Import Standard Libraries
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sklearn.preprocessing import StandardScaler

# Read Data

In [2]:
# Read data
data = pd.read_csv('./../../data/diabetes.csv')

In [3]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
# Define Features & Label as arrays
x = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Data Preprocessing

In [5]:
# Data Normalisation
standard_scaler = StandardScaler()

# Fit and transform X
x = standard_scaler.fit_transform(x)

In [6]:
# Convert to PyTorch tensor
x = torch.tensor(x)
y = torch.tensor(y).unsqueeze(1) # Transform the shape from (n) to (n, 1)

In [7]:
# Define the Dataset for manipulating the data in PyTorch
class Dataset(Dataset):
    
    def __init__(self, x, y):
        
        self.x = x
        self.y = y
        
    def __getitem__(self, index):
        
        return self.x[index], self.y[index]
    
    def __len__(self):
        
        return len(self.x)

In [8]:
# Create the Dataset object
dataset = Dataset(x, y)

In [9]:
# Define the DataLoader for loading the data into the Neural Network in batches
train_data_loader = DataLoader(dataset,
                               batch_size=32,
                               shuffle=True)

# Model Definition

## Define the Neural Network

In [10]:
class Model(nn.Module):
    
    def __init__(self, input_features, output_features):
        
        """
        Initialise the Neural Network
        
        Args:
            input_features Integer number of input features
            output_features Integer number of output features
            
        Return:
        """
        
        # Call the nn.Module constructor
        super(Model, self).__init__()
        
        # Define the layers
        self.fully_connected_layer_1 = nn.Linear(in_features=input_features, 
                                                 out_features= 5)
        
        self.fully_connected_layer_2 = nn.Linear(in_features=5, 
                                                 out_features= 4)
        
        self.fully_connected_layer_3 = nn.Linear(in_features=4, 
                                                 out_features= 3)
        
        self.fully_connected_layer_4 = nn.Linear(in_features=3, 
                                                 out_features= output_features)
        
        # Define the activation functions for the hidden layers (Tanh) and the output layers (Sigmoid)
        self.hidden_layers_activation_function = nn.Tanh()
        self.output_activation_function = nn.Sigmoid()
        
        
    def forward(self, x):
        
        """
        Feed Forward process
        
        Args:
            x Array input
            
        Return:
            output Array output
        """
        
        # Feed forward the input x through the whole neural network
        # Steps: 1. Ingest the input into the layer; 2. Apply the activation function
        output_1 = self.fully_connected_layer_1(x)
        output_1 = self.hidden_layers_activation_function(output_1)
        
        output_2 = self.fully_connected_layer_2(output_1)
        output_2 = self.hidden_layers_activation_function(output_2)
        
        
        output_3 = self.fully_connected_layer_3(output_2)
        output_3 = self.hidden_layers_activation_function(output_3)
        
        
        output = self.fully_connected_layer_4(output_3)
        output = self.output_activation_function(output)
        
        return output

## Instantiate the Neural Network, Loss and Optimizer

In [11]:
# Instance the Neural Network
neural_network = Model(8, 1)

In [12]:
# Define the Binary Cross-Entropy Loss
bce = nn.BCELoss(reduction='mean')

In [13]:
# Define the SGD Optimisation function
# NOTE: It requires the weights from the NN (neural_network.parameters() and the learning rate
sgd = torch.optim.SGD(neural_network.parameters(), 
                      lr=0.1, 
                      momentum=0.9)

## Training

In [14]:
# Hyperparameters
epochs = 200

In [15]:
for epoch in range(epochs):
    
    # Use the DataLoader batch_size
    for input_data, labels in train_data_loader:
        
        # Convert to float
        input_data = input_data.float()
        labels = labels.float()
        
        # Feed the input_data into the neural network automatically calling the 'forward' function
        output = neural_network(input_data)
        
        # Compute the loss
        loss = bce(output, labels)
        
        # Clear the gradient buffer
        sgd.zero_grad()
        
        # Calculate the gradients
        loss.backward()
        
        # Update the weights
        sgd.step()
        
    # Compute the epoch accuracy for all the batches
    # NOTE: Retrieve only output values bigger than 0.5
    output = (output > 0.5).float()
    accuracy = (output == labels).float().mean()
        
    print("Epoch {}/{} - Loss: {:.3f} - Accuracy: {:.3f}".format(epoch + 1, epochs, loss, accuracy))

Epoch 1/200 - Loss: 0.506 - Accuracy: 0.812
Epoch 2/200 - Loss: 0.586 - Accuracy: 0.656
Epoch 3/200 - Loss: 0.525 - Accuracy: 0.719
Epoch 4/200 - Loss: 0.459 - Accuracy: 0.750
Epoch 5/200 - Loss: 0.431 - Accuracy: 0.750
Epoch 6/200 - Loss: 0.424 - Accuracy: 0.844
Epoch 7/200 - Loss: 0.555 - Accuracy: 0.750
Epoch 8/200 - Loss: 0.591 - Accuracy: 0.688
Epoch 9/200 - Loss: 0.347 - Accuracy: 0.844
Epoch 10/200 - Loss: 0.543 - Accuracy: 0.750
Epoch 11/200 - Loss: 0.410 - Accuracy: 0.750
Epoch 12/200 - Loss: 0.395 - Accuracy: 0.844
Epoch 13/200 - Loss: 0.427 - Accuracy: 0.844
Epoch 14/200 - Loss: 0.642 - Accuracy: 0.688
Epoch 15/200 - Loss: 0.506 - Accuracy: 0.750
Epoch 16/200 - Loss: 0.460 - Accuracy: 0.750
Epoch 17/200 - Loss: 0.411 - Accuracy: 0.812
Epoch 18/200 - Loss: 0.464 - Accuracy: 0.812
Epoch 19/200 - Loss: 0.505 - Accuracy: 0.719
Epoch 20/200 - Loss: 0.579 - Accuracy: 0.656
Epoch 21/200 - Loss: 0.414 - Accuracy: 0.812
Epoch 22/200 - Loss: 0.459 - Accuracy: 0.750
Epoch 23/200 - Loss