# Introduction

In this notebook, we will do a comprehensive analysis of the Disese Diabetes by comparing the Data of thousands of patients.

# Import Statements

In [None]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

# Read & Load the dataset using Pandas

In [None]:
data = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vQSMaLAPwUINcpluu2_4iBWsEzrXMOvu9fqmWjLC9BxxOnxPhPXSpg9TvRCQ_IvrmJ2O3x0LI_AMNaE/pub?output=csv')

## For x: Extract the dataset from all rows & all columns except last.
## For y: Extract the last column.
## Convert both to numpy using the .values method

In [None]:
x = data.iloc[:,0:-1].values
y_string= list(data.iloc[:,-1])

# Lets have a look some samples from our data

In [None]:
print(x[:3])
print(y_string[:3])

# Our neural network only understand numbers! So convert the string to labels

In [None]:
y_int = []
for string in y_string:
    if string == 'positive':
        y_int.append(1)
    else:
        y_int.append(0)

# Now convert to an array

In [None]:
y = np.array(y_int, dtype = 'float64')

# Feature Normalization. All features should have the same range of values (-1,1)

In [None]:
sc = StandardScaler()
x = sc.fit_transform(x)

## Now we convert the arrays to PyTorch tensors
### We add an extra dimension to convert this array to 2D

In [None]:
x = torch.tensor(x)
y = torch.tensor(y).unsqueeze(1)

In [None]:
print(x.shape)
print(y.shape)

In [None]:
class Dataset(Dataset):

    def __init__(self,x,y):
        self.x = x
        self.y = y
        
    def __getitem__(self,index):
        # Get one item from the dataset
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [None]:
dataset = Dataset(x,y)

In [None]:
len(dataset)

# Load the data to your dataloader for batch processing and shuffling

In [None]:
train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)

# Let's have a look at the data loader

In [None]:
print("There is {} batches in the dataset".format(len(train_loader)))
for (x,y) in train_loader:
    print("For one iteration (batch), there is:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

![demo](https://user-images.githubusercontent.com/30661597/60379583-246e5e80-9a68-11e9-8b7f-a4294234c201.png)

# Now let's build the above network

In [None]:
class Model(nn.Module):
    def __init__(self, input_features):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_features, 5)
        self.fc2 = nn.Linear(5, 4)
        self.fc3 = nn.Linear(4, 3)
        self.fc4 = nn.Linear(3, 1)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

### Create the network (an object of the Net class)
### In Binary Cross Entropy: the input and output should have the same shape 
### size_average = True --> the losses are averaged over observations for each minibatch

### We will use SGD with momentum with a learning rate of 0.1

In [None]:
net = Model(x.shape[1])
criterion = torch.nn.BCELoss(size_average=True)   
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

# Train the network 

In [None]:
num_epochs = 200
for epoch in range(num_epochs):
    for inputs,labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        # Feed Forward
        output = net(inputs)
        # Loss Calculation
        loss = criterion(output, labels)
        # Clear the gradient buffer (we don't want to accumulate gradients)
        optimizer.zero_grad()
        # Backpropagation 
        loss.backward()
        # Weight Update: w <-- w - lr * gradient
        optimizer.step()
        
    #Accuracy
    # Since we are using a sigmoid, we will need to perform some thresholding
    output = (output>0.5).float()
    # Accuracy: (output == labels).float().sum() / output.shape[0]
    accuracy = (output == labels).float().mean()
    # Print statistics 
    print("Epoch {}/{}, Loss: {:.3f}, Accuracy: {:.3f}".format(epoch+1,num_epochs, loss, accuracy))