In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [2]:
# Loading the diabetes classification dataset
data = pd.read_csv('diabetes.csv')
data.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive


In [11]:
# Feature Extraction and Processing

# Function to convert label to 1s and 0s
def convert_label(label):
    if label == 'positive':
        return 1
    else:
        return 0


X = data.iloc[:,:-1].values
sc = StandardScaler()
X = sc.fit_transform(X)
X = torch.tensor(X)


y_string= list(data.iloc[:,-1])
y_int = []

# Coverting the labels to 0s and 1s
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

Y = np.array(y_int, dtype = 'float64')   
#Y = data.iloc[:,-1].apply(convert_label).values.astype('float64')
Y = torch.tensor(Y).unsqueeze(1)

#print(X)
#print(Y)

In [12]:
class Dataset(Dataset):
    def __init__(self, x, y):
        self.X = x
        self.Y = y
        
    def __getitem__(self, index):
        return self.X[index], self.Y[index]
    
    def __len__(self):
        return len(self.X)
    
class Model(nn.Module):
    def __init__(self, input_features, output_features=1):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, output_features)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

In [13]:
# Load the data using DataLoader for batch processing and shuffling
dataset = Dataset(X, Y)
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True)

print("There are {} batches in the dataset:".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration, there are:")
    print("Data: {}".format(x.shape))
    print("labels: {}".format(y.shape))
    break

There are 24 batches in the dataset:
For one iteration, there are:
Data: torch.Size([32, 7])
labels: torch.Size([32, 1])


In [14]:
model = Model(X.shape[1], 1)

# Define the loss function and the optimizer
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.8)

In [15]:
# Train the network

epochs = 200

for epoch in range(epochs):
    for inputs, labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        
        # Forward propagation
        output = model(inputs) # or can do model.forward()
        loss = criterion(output, labels) # Loss Calculation
        optimizer.zero_grad # Clear the Gradient Buffer
        
        loss.backward() # performing backpropagation and calculating the gradients
        optimizer.step() # Updating the weights
    
    # Accuracy Calculation
    output = (output>0.5).float()
    accuracy = (output == labels).float().mean()
    print("Epoch {}/{}, Loss = {:.3f}, Accuracy = {:.3f}".format(epoch+1,epochs,loss,accuracy))

Epoch 1/200, Loss = 1.548, Accuracy = 0.625
Epoch 2/200, Loss = 19.481, Accuracy = 0.781
Epoch 3/200, Loss = 30.486, Accuracy = 0.344
Epoch 4/200, Loss = 40.625, Accuracy = 0.594
Epoch 5/200, Loss = 25.000, Accuracy = 0.750
Epoch 6/200, Loss = 37.500, Accuracy = 0.625
Epoch 7/200, Loss = 15.625, Accuracy = 0.844
Epoch 8/200, Loss = 18.750, Accuracy = 0.812
Epoch 9/200, Loss = 18.750, Accuracy = 0.812
Epoch 10/200, Loss = 18.750, Accuracy = 0.812
Epoch 11/200, Loss = 25.000, Accuracy = 0.750
Epoch 12/200, Loss = 25.000, Accuracy = 0.750
Epoch 13/200, Loss = 31.250, Accuracy = 0.688
Epoch 14/200, Loss = 28.125, Accuracy = 0.719
Epoch 15/200, Loss = 40.625, Accuracy = 0.594
Epoch 16/200, Loss = 25.000, Accuracy = 0.750
Epoch 17/200, Loss = 18.750, Accuracy = 0.812
Epoch 18/200, Loss = 34.375, Accuracy = 0.656
Epoch 19/200, Loss = 37.500, Accuracy = 0.625
Epoch 20/200, Loss = 40.625, Accuracy = 0.594
Epoch 21/200, Loss = 25.000, Accuracy = 0.750
Epoch 22/200, Loss = 25.000, Accuracy = 0.75

Epoch 186/200, Loss = 25.000, Accuracy = 0.750
Epoch 187/200, Loss = 21.875, Accuracy = 0.781
Epoch 188/200, Loss = 28.125, Accuracy = 0.719
Epoch 189/200, Loss = 21.875, Accuracy = 0.781
Epoch 190/200, Loss = 34.375, Accuracy = 0.656
Epoch 191/200, Loss = 34.375, Accuracy = 0.656
Epoch 192/200, Loss = 28.125, Accuracy = 0.719
Epoch 193/200, Loss = 28.125, Accuracy = 0.719
Epoch 194/200, Loss = 18.750, Accuracy = 0.812
Epoch 195/200, Loss = 31.250, Accuracy = 0.688
Epoch 196/200, Loss = 25.000, Accuracy = 0.750
Epoch 197/200, Loss = 31.250, Accuracy = 0.688
Epoch 198/200, Loss = 28.125, Accuracy = 0.719
Epoch 199/200, Loss = 34.375, Accuracy = 0.656
Epoch 200/200, Loss = 21.875, Accuracy = 0.781
