In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset 
from torch.utils.data import DataLoader
from typing import List, Tuple, Dict

In [14]:
#read dataset
data = pd.read_csv('/nfs/site/home/abalusan/wa_1/courses/ailc_dl/5.diabetes.csv')

In [3]:
#Pre-process data.
#1) Extract i/p(x - all columns except the last) and o/p(y-last column)
# all rows and columns from 0th to the last but one column. And .values converts it into a numpy array
x_np = data.iloc[:,0:-1].values
y_string = list(data.iloc[:,-1])

In [4]:
#Convert negative/positive strings to 0/1 booleans. 
y_int = list(map(lambda x: 0 if x == 'negative' else 1, y_string))
#Convert y_int to numpy array or torch tensor as list are inefficient when it comes to matrix operations. 
# Ensure dtype of y is same as the dtype of all features in x.
y_np = np.array(y_int, dtype='float64')

In [5]:
#Normalize the features - Improves training time. 
#Larger values will have more impact on optimization direction and lower values will be neglected. Resulting in the exploding gradients problem.
#Another issue would be, due to larger gradient of a feat., it takes larger steps and reaches the optimal weight corresponding to it sooner than others while the other low valued -
#-features converge slowly. 
#Both these issues increase training time and can be avoided w/ normalization

#We perform feature nomralization w/ standardization technique x_new = (x - mean)/std

sc = StandardScaler()
x_np = sc.fit_transform(x_np) #fit - calculates the mean and std, transform - applies the transformation based on the mean & std. This function does both.

In [6]:
#Convert numpy arrays to pytorch tensors to work w/ pytorch library
#TODO: Explore different way to converting numpy array to a tensor and their implications
#If we do not unsqueeze the y tensor, it will have a single dimension and it would not be compatible w/ BCE loss. Unsqueeze adds converts y from [768] to [768,1] shape.
x = torch.tensor(x_np)
y = torch.tensor(y_np).unsqueeze(1)

In [7]:
#Inherit from the dataset class imported from torch.utils.data. If A(B), A inherits from B.
class Dataset(Dataset):

    def __init__(self: Dataset, x: torch.FloatTensor, y: torch.FloatTensor) -> None:
        self.x = x
        self.y = y

    #Overriding this function from the parent Dataset class to work for our custom dataset
    def __getitem__(self: Dataset, index: int) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        return self.x[index], self.y[index]
    
    #Override the function to return the number of rows
    def __len__(self: Dataset) -> int:
        return len(self.x)

In [8]:
#Create an instance of the function
dataset = Dataset(x, y)
len(dataset)

768

In [9]:
#TODO: Explore the shuffle option
#Creating a generator to fetch 'batch_size' samples from the dataset.
train_loader = DataLoader(dataset=dataset,
           batch_size=32,
           shuffle = True)

#print("There is {} batches in the dataset".format(len(train_loader)))

In [10]:
##NN arch: 7 i/p neurons (each neuron per feat) and 1 o/p neuron (sigmoid) (1 neuron per 1 label prediction). 
## 3 hidden layers 4,4,3.
## The i/p layer has 3 names: fully connected layer, MLP (multi-layer perceptron), linear layer.

#Inherit from the nn.module class
#super() - advantage is that we can extend and modify the behaviour of the parent class method in subclass w/o completely replacing it.
#1) Suppose we want to call the parent class init method, instead of explicitly calling the Parent class method, we can just use super which makes the code more maintainable.
#2) In multiple inheritance case, the super takes care of the correct method resolution order (MRO) and ensures that the parent class's method is only called once.
#3) do_something() is a parent method. We can extend this in sub class like "super().do_something(); <additional lines to extend functionality>"
class Model(nn.Module):
    def __init__(self, input_features: int, output_features: int) -> None:
        super(Model, self).__init__()
        #NN architecture
        #Hidden layers
        self.fc1 = nn.Linear(input_features,5)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        self.fc4 = nn.Linear(3,output_features)
        #Sigmoid activation for the output layer
        self.sigmoid = nn.Sigmoid()
        #Tanh activation for hidden layers
        self.tanh = nn.Tanh()

    #Forward prop defined and pytorch figures out the backprop & does it automagically. TODO: Custom backprop possible. Why and when would you use it?
    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out


In [11]:
#Object of our class
net = Model(7,1)

#BCE loss as this is a binary classification task
#size_average=True says that the losses are averaged over the batch. If not, we get loss per sample. 
criterion = loss = torch.nn.BCELoss(reduction='mean')

#optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum= 0.9)

In [12]:
#Train the neural network
#Hyper-parameters
epochs = 200

#In pytorch we need to use the for loop to train, in tensorflow/keras the fit function automatically does it.
for epoch in range(epochs):
    #The for loop below runs one epoch
    for inputs, labels in train_loader:
        #Already did this, but make this a habit to ensure labels and features are in the same dtype format
        inputs = inputs.float()
        labels = labels.float() 

        #Forward prop. Here, if regular python, we need to explicitly call the forward method in the comment below. But pytorch does this automatically when we pass input to the net object
        outputs = net(inputs)
        #net.forward()
        #Loss calculation
        loss = criterion(outputs, labels)

        #Clear the gradient buffer to ensure that we dont accumulate the gradient. In pytorch, this must be done manually. 
        optimizer.zero_grad() # sets all gradients to 0
        loss.backward() # Perform backprop and calculate gradients
        optimizer.step() # Applies the weight update rule once

    #Accuracy calculation. (tensor <condition> tensor) Every element is compared with the condition and return 1/0 for True/False per that condition 
    output = (outputs>0.5).float() #threshold the output of sigmoid layer
    accuracy = (output == labels).float().mean()
    
    #Print statistics. Epoch+1 as in python epoch starts from 0.
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.3f}, Accuracy: {accuracy:3f}")

Epoch 1/200, Loss: 0.647, Accuracy: 0.656250
Epoch 2/200, Loss: 0.636, Accuracy: 0.593750
Epoch 3/200, Loss: 0.663, Accuracy: 0.625000
Epoch 4/200, Loss: 0.435, Accuracy: 0.812500
Epoch 5/200, Loss: 0.637, Accuracy: 0.625000
Epoch 6/200, Loss: 0.575, Accuracy: 0.718750
Epoch 7/200, Loss: 0.630, Accuracy: 0.656250
Epoch 8/200, Loss: 0.583, Accuracy: 0.687500
Epoch 9/200, Loss: 0.387, Accuracy: 0.843750
Epoch 10/200, Loss: 0.629, Accuracy: 0.750000
Epoch 11/200, Loss: 0.459, Accuracy: 0.812500
Epoch 12/200, Loss: 0.387, Accuracy: 0.875000
Epoch 13/200, Loss: 0.514, Accuracy: 0.750000
Epoch 14/200, Loss: 0.608, Accuracy: 0.781250
Epoch 15/200, Loss: 0.509, Accuracy: 0.718750
Epoch 16/200, Loss: 0.588, Accuracy: 0.718750
Epoch 17/200, Loss: 0.355, Accuracy: 0.875000
Epoch 18/200, Loss: 0.403, Accuracy: 0.781250
Epoch 19/200, Loss: 0.515, Accuracy: 0.781250
Epoch 20/200, Loss: 0.500, Accuracy: 0.843750
Epoch 21/200, Loss: 0.439, Accuracy: 0.750000
Epoch 22/200, Loss: 0.318, Accuracy: 0.9375

In [13]:
#Fluctuating accuracy - how to fix it?
# Train, test, val split not performed (So no inference)