In [173]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn

from sklearn.preprocessing import StandardScaler

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

##### **Data Preprocessing**

In [174]:
# Load the dataset
data = pd.read_csv('diabetes.csv')

In [175]:
data

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,63,negative
764,2,122,70,27,0,36.8,27,negative
765,5,121,72,23,112,26.2,30,negative
766,1,126,60,0,0,30.1,47,positive


In [176]:
# For x, extract out all columns except last column, and convert to numpy using the .values method.
x = data.iloc[:, :-1].values

# For y, extract out the last column
y_string = list(data.iloc[:, -1])
# Our neural network only understand numbers. So convert the string to labels.
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)
# Convert to an array.
y = np.array(y_int, dtype='float64')

##### **Data Normalization**

In [177]:
x

array([[  6. , 148. ,  72. , ...,   0. ,  33.6,  50. ],
       [  1. ,  85. ,  66. , ...,   0. ,  26.6,  31. ],
       [  8. , 183. ,  64. , ...,   0. ,  23.3,  32. ],
       ...,
       [  5. , 121. ,  72. , ..., 112. ,  26.2,  30. ],
       [  1. , 126. ,  60. , ...,   0. ,  30.1,  47. ],
       [  1. ,  93. ,  70. , ...,   0. ,  30.4,  23. ]])

Feature Normalization

- standardization
$x' = \frac{x-\bar x}{\sigma}$

In [178]:
# All features should have the same range of values (-1, 1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [179]:
x

array([[ 0.63994726,  0.84832379,  0.14964075, ..., -0.69289057,
         0.20401277,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.69289057,
        -0.68442195, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -0.69289057,
        -1.10325546, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ...,  0.27959377,
        -0.73518964, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.69289057,
        -0.24020459,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.69289057,
        -0.20212881, -0.87137393]])

In [180]:
# Convert to a PyTorch tensor
x = torch.tensor(x)
y = torch.tensor(y)

print(x.shape, y.shape)

torch.Size([768, 7]) torch.Size([768])


In [181]:
# Since we're going to use the binary cross entropy, y needs to be a two dimensional matrix.
y = y.unsqueeze(1)
print(y.shape)

torch.Size([768, 1])


##### **Creating and Loading the Dataset**

The **Dataset class** is used to implement a DataLoader.
A **DataLoader** is essential in deep learning training for the following reasons:
1. **Batch-wise Data Loading**: It automatically divides the dataset into smaller, manageable batches, making training more efficient.
2. **Shuffling**: It randomizes the order of data to prevent the model from learning dependencies based on sequence.

The code below defines a custom `Dataset` class. The class inherits from a base `Dataset` class and initializes with input data `x` and labels `y`. 
- Inheriting from a base `Dataset` class ensures compatibility with deep learning frameworks, leverages reusable functionalities, enhances code consistency and readability, supports polymorphism, and improves maintainability and scalability.

In [182]:
class Dataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [183]:
dataset = Dataset(x, y)

In [184]:
len(dataset)

768

In [185]:
# Load the data to your dataloader for batch processing and shuffling
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)

print("There is {} batches in the dataset.".format(len(train_loader)))
for (x, y) in train_loader:
    print("For one iteration (batch), there is: ")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

There is 24 batches in the dataset.
For one iteration (batch), there is: 
Data:    torch.Size([32, 7])
Labels:  torch.Size([32, 1])


In [186]:
768 / 32 == 24

True

##### **Building the Network**

In [187]:
class Model(nn.Module):
    def __init__(self, input_features, output_features):
        super(Model, self).__init__()
        # 7 input neurons - 5 - 4 - 3 - 1 output neuron
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, output_features)
        # two type of activation functions
        # For the hidden layers, tanh.
        # For the output, sigmoid, as we should have our output between 0 and one to use binary cross entropy.
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out
    
    # We don't need to code back propagation function because Pytorch will automatically do it for us.
    # It just follows the opposite order of the forward propagation function.

In [188]:
net = Model(7, 1)

Binary Cross Entropy

In [189]:
criterion = nn.BCELoss(reduction='mean')

Stochastic Gradient Descent with momentum

In [190]:
# Use SGD with momentum with a learning rate of 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9) # weights

##### **Training the Network**

In [191]:
epochs = 200

In [192]:
for epoch in range(epochs):
    for inputs, labels in train_loader:
        # both of them are float tensors, but we just want to be safe
        inputs = inputs.float()
        labels = labels.float()
        
        # Forward Propagation: Pass inputs through the model to compute predictions.
        # No need to specify .forward(), just pass inputs to the net object argument. Pytorch will automatically call the forward function.
        outputs = net(inputs)
        
        # Loss Calculation: Compute the loss between predicted values and actual labels.
        loss = criterion(outputs, labels)
        
        # Clear the Gradient Buffer: Reset gradients to prevent accumulation from previous steps.
        # Weight Update Rule: replace weight with (weight - learning rate * gradient)
        optimizer.zero_grad()
        
        # Back Propagation: Will have a matrix of gradients
        loss.backward()
        
        # Update Weights
        optimizer.step()
        
    # Accuracy Calculation
    output = (outputs > 0.5).float()
    accuracy = (output == labels).float().mean()
    
    # Print Statistics
    print("Epoch: {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1, epochs, loss, accuracy))

Epoch: 1/200, Loss: 0.643, Accuracy: 0.656
Epoch: 2/200, Loss: 0.557, Accuracy: 0.719
Epoch: 3/200, Loss: 0.375, Accuracy: 0.906
Epoch: 4/200, Loss: 0.682, Accuracy: 0.625
Epoch: 5/200, Loss: 0.565, Accuracy: 0.750
Epoch: 6/200, Loss: 0.453, Accuracy: 0.875
Epoch: 7/200, Loss: 0.517, Accuracy: 0.719
Epoch: 8/200, Loss: 0.577, Accuracy: 0.688
Epoch: 9/200, Loss: 0.622, Accuracy: 0.688
Epoch: 10/200, Loss: 0.567, Accuracy: 0.688
Epoch: 11/200, Loss: 0.542, Accuracy: 0.656
Epoch: 12/200, Loss: 0.433, Accuracy: 0.875
Epoch: 13/200, Loss: 0.513, Accuracy: 0.719
Epoch: 14/200, Loss: 0.436, Accuracy: 0.781
Epoch: 15/200, Loss: 0.442, Accuracy: 0.844
Epoch: 16/200, Loss: 0.551, Accuracy: 0.719
Epoch: 17/200, Loss: 0.571, Accuracy: 0.688
Epoch: 18/200, Loss: 0.604, Accuracy: 0.688
Epoch: 19/200, Loss: 0.548, Accuracy: 0.719
Epoch: 20/200, Loss: 0.633, Accuracy: 0.656
Epoch: 21/200, Loss: 0.457, Accuracy: 0.719
Epoch: 22/200, Loss: 0.422, Accuracy: 0.781
Epoch: 23/200, Loss: 0.615, Accuracy: 0.7