In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

In [2]:
# Load the dataset using pandas
data = pd.read_csv('diabetes.csv')

In [3]:
data

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,63,negative
764,2,122,70,27,0,36.8,27,negative
765,5,121,72,23,112,26.2,30,negative
766,1,126,60,0,0,30.1,47,positive


In [4]:
# For x: Extract out the dataset from all the rows (all samples) and all the columns except the last column (all features)
# For y: Extract out the last column
# Convert the extracted data into numpy arrays using the .values method
x = data.iloc[:, 0:-1].values
y_string = list(data.iloc[:, -1].values)


In [5]:
print(x.shape)
print(len(y_string))

(768, 7)
768


In [6]:
# Convert y from string labels to integer labels
y_int = []
for s in y_string:
    if s == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)   

In [7]:
y_int

[1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,


In [8]:
# convert to an array
y = np.array(y_int, dtype=np.float64)

In [9]:
x

array([[  6. , 148. ,  72. , ...,   0. ,  33.6,  50. ],
       [  1. ,  85. ,  66. , ...,   0. ,  26.6,  31. ],
       [  8. , 183. ,  64. , ...,   0. ,  23.3,  32. ],
       ...,
       [  5. , 121. ,  72. , ..., 112. ,  26.2,  30. ],
       [  1. , 126. ,  60. , ...,   0. ,  30.1,  47. ],
       [  1. ,  93. ,  70. , ...,   0. ,  30.4,  23. ]])

In [10]:
# Feature Normalization. ALL features should have the same range of values (-1,1)
sc = StandardScaler()
x = sc.fit_transform(x)

In [11]:
# Convert the arrays to PyTorch tensors
x = torch.tensor(x)
y = torch.tensor(y).unsqueeze(1)

In [12]:
class Dataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [13]:
dataset = Dataset(x, y)

In [14]:
len(dataset)

768

In [15]:
# Load the dataset for batch processing and shuffling
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=32, shuffle=True)
train_loader

<torch.utils.data.dataloader.DataLoader at 0x35a1a7230>

In [16]:
print ("There is {} batches in the dataset". format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there is: ")
    print("Data: {}". format (x. shape))
    print("Labels: {}".format(y.shape) ) 
    break

There is 24 batches in the dataset
For one iteration (batch), there is: 
Data: torch.Size([32, 7])
Labels: torch.Size([32, 1])


## **Building Neural Network**

$$H_{p}(q) = -\frac{1}{N} \sum_{i=1}^{N} y_i \log\big(p(y_i)\big) + (1-y_i)\log\big(1-p(y_i)\big)$$

$$\text{cost} = -\big( Y \cdot \log(\text{hypothesis}) + (1-Y)\cdot \log(1-\text{hypothesis}) \big).\text{mean}()$$

In [17]:
class Model(nn.Module):
    def __init__(self, input_features, output_features):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, output_features)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

In [18]:
# Create the network 
net = Model(7,1)
# In Binary Cross Entropy: the input and output should be of the same shape
criterion = torch.nn.BCELoss(size_average = True)
# Learning rate of 0.1
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)



In [19]:
# Traning the model
num_epochs = 200
for epoch in range (num_epochs):
    for inputs, labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Forward Propagation
        outputs = net(inputs)
        outputs = net(inputs)
        # Loss Calculation
        loss = criterion(outputs, labels)
        # Clear the gradient buffer
        optimizer.zero_grad()
        # Calculate the gradients & Back Propagation
        loss.backward()
        # Update the weights
        optimizer.step()

    # Accuracy Calculation
    outputs = (outputs > 0.5).float()
    accuracy = (outputs == labels).float().mean()
    # Print Statistics
    print ('Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}'.format(epoch+1, num_epochs, loss, accuracy))

Epoch 1/200, Loss: 0.510, Accuracy: 0.750
Epoch 2/200, Loss: 0.469, Accuracy: 0.719
Epoch 3/200, Loss: 0.544, Accuracy: 0.688
Epoch 4/200, Loss: 0.613, Accuracy: 0.688
Epoch 5/200, Loss: 0.463, Accuracy: 0.750
Epoch 6/200, Loss: 0.415, Accuracy: 0.812
Epoch 7/200, Loss: 0.467, Accuracy: 0.719
Epoch 8/200, Loss: 0.539, Accuracy: 0.719
Epoch 9/200, Loss: 0.498, Accuracy: 0.750
Epoch 10/200, Loss: 0.439, Accuracy: 0.875
Epoch 11/200, Loss: 0.599, Accuracy: 0.750
Epoch 12/200, Loss: 0.487, Accuracy: 0.719
Epoch 13/200, Loss: 0.231, Accuracy: 0.938
Epoch 14/200, Loss: 0.532, Accuracy: 0.750
Epoch 15/200, Loss: 0.382, Accuracy: 0.844
Epoch 16/200, Loss: 0.386, Accuracy: 0.812
Epoch 17/200, Loss: 0.430, Accuracy: 0.812
Epoch 18/200, Loss: 0.365, Accuracy: 0.781
Epoch 19/200, Loss: 0.282, Accuracy: 0.906
Epoch 20/200, Loss: 0.479, Accuracy: 0.750
Epoch 21/200, Loss: 0.542, Accuracy: 0.688
Epoch 22/200, Loss: 0.475, Accuracy: 0.688
Epoch 23/200, Loss: 0.426, Accuracy: 0.812
Epoch 24/200, Loss: 