### Feed Forward Neural Network for Diabetes Prediction

In [1]:
import numpy as np
import torch

**Import Dataset diabetes.csv from Data/diabetes.csv**

In [3]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.1.5-cp36-cp36m-win_amd64.whl (8.7 MB)
Collecting pytz>=2017.2
  Downloading pytz-2021.1-py2.py3-none-any.whl (510 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.1.5 pytz-2021.1


In [12]:
import pandas as pd
df = pd.read_csv('Data/diabetes.csv')

In [13]:
df.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age,Class
0,6,148,72,35,0,33.6,50,positive
1,1,85,66,29,0,26.6,31,negative
2,8,183,64,0,0,23.3,32,positive
3,1,89,66,23,94,28.1,21,negative
4,0,137,40,35,168,43.1,33,positive


### Data preprocess

In [19]:
features = df.iloc[:,:-1]
labels = df.iloc[:,-1]

In [28]:
features_numpy = features.values
labels_list = list(labels)

In [29]:
def convert_str_int(labels):
    labels_int = []
    for i in labels:
        if i=='positive':
            labels_int.append(1)
        else:
            labels_int.append(0)
    return labels_int

In [30]:
labels_numpy = np.array(convert_str_int(labels_list)) 

### Data Normalization

In [37]:
features.describe()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skin fold thickness,2-Hour serum insulin,Body mass index,Age
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,33.240885
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,11.760232
min,0.0,0.0,0.0,0.0,0.0,0.0,21.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,24.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,29.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,41.0
max,17.0,199.0,122.0,99.0,846.0,67.1,81.0


* Normalize these features to -1 to 1

In [40]:
from sklearn.preprocessing import StandardScaler

In [42]:
sc = StandardScaler()
features_numpy = sc.fit_transform(features_numpy)

##### Convert to pytorch tensors

In [61]:
X = torch.tensor(features_numpy)
Y = torch.tensor(labels_numpy).unsqueeze(1)

In [64]:
print(X.shape)
print(Y.shape)

torch.Size([768, 7])
torch.Size([768, 1])


#### Create and load Dataset

In [86]:
from torch.utils.data import Dataset
class Datasets(Dataset):
    def __init__(self, x,y):
        self.x = x
        self.y = y
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)
    

In [87]:
dataset = Datasets(X,Y)

In [88]:
len(dataset)

768

In [116]:
# Load the data to DataLoader for batch processing and shuffling
train_loader = torch.utils.data.DataLoader(dataset = dataset, 
                            batch_size =32, 
                            shuffle = True)

In [117]:
print("There are {} batches in dataset".format(len(train_loader)))

for (x,y) in train_loader:
    print("for one iteration (batch): ")
    print("Data :     {}".format(x.shape))
    print("Label :     {}".format(y.shape))
    break

There are 24 batches in dataset
for one iteration (batch): 
Data :     torch.Size([32, 7])
Label :     torch.Size([32, 1])


### Model Architecture
![alt text](Data/architecture.png "Model")

In [118]:
class MyModel(torch.nn.Module):
    
    def __init__(self, input_features, output_features):
        super(MyModel,self).__init__()
        self.fc1 = torch.nn.Linear(input_features,7)
        self.fc2 = torch.nn.Linear(7,4)
        self.fc3 = torch.nn.Linear(4,2)
        self.fc4 = torch.nn.Linear(2,output_features)

        #ouput activation : sigmoid
        #hidden activation : tanh

        self.sigmoid = torch.nn.Sigmoid()
        self.tanh = torch.nn.Tanh()
    
    def forward(self,x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        
        return out

#### Binary Cross Entropy Loss
L = -1/N * [(Summation) i=1 to N ] (yi.log(p(yi)) + (1-yi).log(p(yi))

In [119]:
input_features = X.shape[1]
output_features = Y.shape[1]
net = MyModel(input_features,output_features)

In [120]:
criterion = torch.nn.BCELoss(size_average = True)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.11, momentum = 0.9)

#### Training Network

In [121]:
epochs = 200
for epoch in range(epochs):
    
    net.train()
    # this happens for each epoch / #'s batch
    for inputs, labels in train_loader:
        inputs = inputs.float()
        labels = labels.float()
        
        #Forward Propagation
        outputs = net.forward(inputs)
        
        #Loss calculation
        loss = criterion(outputs, labels)
        
        # clear the gradient buffer only for pytorch(w <-- w- lr*gradient)
        optimizer.zero_grad()
        
        #calculate gradients/ backprop
        loss.backward()
        
        #update weights
        optimizer.step()
            
            
    #Accuracy
    output = (outputs>0.5).float()
    accuracy = (output == labels).float().mean()
    
    
    #print stats
    print("Epoch {}/{}, Loss: {:.4f}, Accuracy: {:.3f}".format(epoch+1,epochs,loss,accuracy))

Epoch 1/200, Loss: 0.6859, Accuracy: 0.594
Epoch 2/200, Loss: 0.5604, Accuracy: 0.750
Epoch 3/200, Loss: 0.6999, Accuracy: 0.656
Epoch 4/200, Loss: 0.5190, Accuracy: 0.719
Epoch 5/200, Loss: 0.6488, Accuracy: 0.719
Epoch 6/200, Loss: 0.4035, Accuracy: 0.812
Epoch 7/200, Loss: 0.3640, Accuracy: 0.844
Epoch 8/200, Loss: 0.5659, Accuracy: 0.719
Epoch 9/200, Loss: 0.5494, Accuracy: 0.750
Epoch 10/200, Loss: 0.6019, Accuracy: 0.625
Epoch 11/200, Loss: 0.5669, Accuracy: 0.688
Epoch 12/200, Loss: 0.4424, Accuracy: 0.750
Epoch 13/200, Loss: 0.3632, Accuracy: 0.844
Epoch 14/200, Loss: 0.4967, Accuracy: 0.719
Epoch 15/200, Loss: 0.4478, Accuracy: 0.812
Epoch 16/200, Loss: 0.5707, Accuracy: 0.719
Epoch 17/200, Loss: 0.4779, Accuracy: 0.750
Epoch 18/200, Loss: 0.4611, Accuracy: 0.781
Epoch 19/200, Loss: 0.4871, Accuracy: 0.781
Epoch 20/200, Loss: 0.4959, Accuracy: 0.719
Epoch 21/200, Loss: 0.4879, Accuracy: 0.781
Epoch 22/200, Loss: 0.5221, Accuracy: 0.719
Epoch 23/200, Loss: 0.4579, Accuracy: 0.7

Epoch 190/200, Loss: 0.3754, Accuracy: 0.844
Epoch 191/200, Loss: 0.3410, Accuracy: 0.875
Epoch 192/200, Loss: 0.2129, Accuracy: 0.938
Epoch 193/200, Loss: 0.4352, Accuracy: 0.812
Epoch 194/200, Loss: 0.5352, Accuracy: 0.688
Epoch 195/200, Loss: 0.5148, Accuracy: 0.781
Epoch 196/200, Loss: 0.4351, Accuracy: 0.781
Epoch 197/200, Loss: 0.3504, Accuracy: 0.844
Epoch 198/200, Loss: 0.5347, Accuracy: 0.781
Epoch 199/200, Loss: 0.2262, Accuracy: 0.906
Epoch 200/200, Loss: 0.2630, Accuracy: 0.906
