In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

In [2]:
# Load dataset
data = pd.read_csv('diabetes.csv', )
data.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive


In [3]:
data.shape

(768, 8)

In [4]:
data.isnull().sum()

Number of times pregnant        0
Plasma glucose concentration    0
Diastolic blood pressure        0
Triceps skin fold thickness     0
2-Hour serum insulin            0
Body mass index                 0
Age                             0
Class                           0
dtype: int64

In [5]:
# Data is clean. lets create our features and label
# Features
x = data.iloc[:, 0:-1].values # to convert into numpy array
y_string = list(data.iloc[:, -1])

In [6]:
print(x[:3])

[[  6.  148.   72.   35.    0.   33.6  50. ]
 [  1.   85.   66.   29.    0.   26.6  31. ]
 [  8.  183.   64.    0.    0.   23.3  32. ]]


In [7]:
print(y_string[:3])

['positive', 'negative', 'positive']


In [8]:
# Convert strings to integers
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

In [9]:
# Converting to numpy array with float datatype
y = np.array(y_int, dtype='float64')

In [10]:
# Normalizing features on to the scale of [0,1]
sc = StandardScaler()
x = sc.fit_transform(x)

In [11]:
# Converting normalized feature inputs into PyTorch's tensors
x = torch.tensor(x)
# We have to match the features dimension for label as well, hence make 1D into a 2D tensor
y = torch.tensor(y).unsqueeze(1)

In [12]:
print(x.shape, y.shape)

torch.Size([768, 7]) torch.Size([768, 1])


In [13]:
class Dataset(Dataset):
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        """ Returns the item matching the index passed as argument"""
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [14]:
dataset = Dataset(x,y)
len(dataset)

768

In [15]:
# Load the data into dataloader to preform batch processing and shuffling
train_loader = torch.utils.data.DataLoader(dataset = dataset, batch_size=32, shuffle=True)

In [18]:
# Let us take a look at dataloader
print("There are {} number of batches".format(len(train_loader)))

for x,y in train_loader:
    print("For one iteration (batch) there exists : \n Data : {} \n Labels : {}".format(x.shape, y.shape))
    break # This ensures loop stops after 1 round

There are 24 number of batches
For one iteration (batch) there exists : 
 Data : torch.Size([32, 7]) 
 Labels : torch.Size([32, 1])


![demo](https://user-images.githubusercontent.com/30661597/60379583-246e5e80-9a68-11e9-8b7f-a4294234c201.png)

In [19]:
# Let us build the neural network
class Model(nn.Module):
    
    def __init__(self, input_features, output_features):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5,4)
        self.fc3 = nn.Linear(4,3)
        self.fc4 = nn.Linear(3,output_features)
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
        
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

$H_{p}(q)=-\frac{1}{N} \sum_{i=1}^{N} y_{i} \cdot \log \left(p\left(y_{i}\right)\right)+\left(1-y_{i}\right) \cdot \log \left(1-p\left(y_{i}\right)\right)$


cost = -(Y * torch.log(hypothesis) + (1 - Y) * torch.log(1 - hypothesis)).mean()

In [20]:
# Create the network (an object of the Net class)
model = Model(7,1)
#In Binary Cross Entropy: the input and output should have the same shape 
#size_average = True --> the losses are averaged over observations for each minibatch
criterion = torch.nn.BCELoss(size_average=True)   
# We will use SGD with momentum with a learning rate of 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)



In [21]:
# Train the network 
num_epochs = 200
for epoch in range(num_epochs):
    for inputs,labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Feed Forward
        # output = model(inputs) can use this syntax in Pytorch. It'll automatically considers forward() method with 1 argument(inputs)
        output = model.forward(inputs)
        # Loss Calculation
        loss = criterion(output, labels)
        # Clear the gradient buffer (we don't want to accumulate gradients)
        optimizer.zero_grad()
        # Backpropagation 
        loss.backward()
        # Weight Update: w <-- w - lr * gradient
        optimizer.step()
        
    #Accuracy
    # Since we are using a sigmoid, we will need to perform some thresholding
    output = (output>0.5).float()
    # Accuracy: (output == labels).float().sum() / output.shape[0]
    accuracy = (output == labels).float().mean()
    # Print statistics 
    print("Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1,num_epochs, loss, accuracy))

Epoch 1/200, Loss: 0.697, Accuracy: 0.531
Epoch 2/200, Loss: 0.628, Accuracy: 0.594
Epoch 3/200, Loss: 0.294, Accuracy: 0.906
Epoch 4/200, Loss: 0.469, Accuracy: 0.688
Epoch 5/200, Loss: 0.490, Accuracy: 0.719
Epoch 6/200, Loss: 0.458, Accuracy: 0.750
Epoch 7/200, Loss: 0.492, Accuracy: 0.750
Epoch 8/200, Loss: 0.536, Accuracy: 0.719
Epoch 9/200, Loss: 0.446, Accuracy: 0.750
Epoch 10/200, Loss: 0.618, Accuracy: 0.594
Epoch 11/200, Loss: 0.328, Accuracy: 0.844
Epoch 12/200, Loss: 0.547, Accuracy: 0.594
Epoch 13/200, Loss: 0.582, Accuracy: 0.625
Epoch 14/200, Loss: 0.521, Accuracy: 0.688
Epoch 15/200, Loss: 0.438, Accuracy: 0.719
Epoch 16/200, Loss: 0.423, Accuracy: 0.812
Epoch 17/200, Loss: 0.508, Accuracy: 0.719
Epoch 18/200, Loss: 0.512, Accuracy: 0.719
Epoch 19/200, Loss: 0.375, Accuracy: 0.875
Epoch 20/200, Loss: 0.544, Accuracy: 0.781
Epoch 21/200, Loss: 0.513, Accuracy: 0.719
Epoch 22/200, Loss: 0.516, Accuracy: 0.719
Epoch 23/200, Loss: 0.498, Accuracy: 0.719
Epoch 24/200, Loss: 