In [1]:
#  Import necessary libraries
import numpy as np
import pandas as pd
from torch.utils.data.dataset import Dataset
import torch
import torch.nn as nn
from torch import optim

In [2]:
# Pima dataset
label_idx = {'0': 0, '1': 1}
class PimaDataset(Dataset):

    def __init__(self, data):
        self.data = data
           
    def __getitem__(self, index):
        item = self.data.iloc[index].values    
        return (item[0:8].astype(np.float32), item[8].astype(np.int))

    def __len__(self):
        return self.data.shape[0]


    def get_datasets(pima_file, train_ratio=0.60,valid_ratio=0.20,test_ratio=0.20):

        labels = {'class': label_idx}
        df = pd.read_csv(pima_file)
        data=(df-df.mean())/df.std()
        print(data.head())
        data.replace(labels, inplace=True)

        train_df = data.sample(frac=train_ratio, random_state=10)
        validation_df = data.sample(frac=valid_ratio, random_state=10)
        test_df = data.sample(frac=test_ratio, random_state=10)

        return PimaDataset(train_df), PimaDataset(validation_df),PimaDataset(test_df)
    


In [3]:
pima_file = "diabetes.csv"

In [4]:
# Get train, validation and test data
train_ds,validation_ds,test_ds = PimaDataset.get_datasets(pima_file)

   Pregnancies   Glucose  BloodPressure  SkinThickness   Insulin       BMI  \
0     0.639530  0.847771       0.149543       0.906679 -0.692439  0.203880   
1    -0.844335 -1.122665      -0.160441       0.530556 -0.692439 -0.683976   
2     1.233077  1.942458      -0.263769      -1.287373 -0.692439 -1.102537   
3    -0.844335 -0.997558      -0.160441       0.154433  0.123221 -0.493721   
4    -1.141108  0.503727      -1.503707       0.906679  0.765337  1.408828   

   DiabetesPedigreeFunction       Age   Outcome  
0                  0.468187  1.425067  1.365006  
1                 -0.364823 -0.190548 -0.731643  
2                  0.604004 -0.105515  1.365006  
3                 -0.920163 -1.040871 -0.731643  
4                  5.481337 -0.020483  1.365006  


In [5]:
print('# instances in training set: ', len(train_ds))
print('# instances in validation set: ', len(validation_ds))
print('# instances in test set: ', len(test_ds))

# instances in training set:  461
# instances in validation set:  154
# instances in test set:  154


In [6]:
# load dataloader
batch_size = 50
train_loader = torch.utils.data.DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_ds, batch_size=batch_size, shuffle=True)
test_loader  = torch.utils.data.DataLoader(dataset=test_ds, batch_size=batch_size, shuffle=True)

In [7]:
# create net
import torch.nn.functional as F
class PimaNet(nn.Module):
    
    def __init__(self, input_size, hidden1_size,hidden2_size,num_classes):
        
        super(PimaNet, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden1_size)
        #self.bn_layer = nn.BatchNorm1d(hidden1_size)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden1_size, hidden2_size)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden2_size, num_classes)   
       
    def forward(self, x):
        
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = F.log_softmax(self.fc3(x), dim=1)
        return x
    

In [8]:
net = PimaNet(8, 200,100, 2)
print(net)

PimaNet(
  (fc1): Linear(in_features=8, out_features=200, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=200, out_features=100, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=100, out_features=2, bias=True)
)


In [9]:
# set loss,learning rate and optimizer
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [10]:
# train the data and check for validation
num_epochs = 50

train_losses, valid_losses = [], []
for epoch in range(num_epochs):
    running_loss = 0
    for i, (items, classes) in enumerate(train_loader):
    
        net.train()           

        optimizer.zero_grad() 
        outputs = net(items)  
        loss = criterion(outputs, classes) 
        loss.backward()       
        optimizer.step()      

        running_loss += loss.item()

    else:
        net.eval()
        valid_loss = 0
        accuracy = 0
        with torch.no_grad():
            for i, (items, classes) in enumerate(validation_loader):
                outputs = net(items)
                loss = criterion(outputs,classes)
                valid_loss += loss.item()
                ps = torch.exp(outputs)
                
                top_p, top_class = ps.topk(1, dim=1)
                labels = classes.view(*top_class.shape)
                
                equals = top_class == labels
                accuracy += torch.mean(equals.type(torch.FloatTensor))
                
                
        train_losses.append(running_loss/len(train_loader))
        valid_losses.append(valid_loss/len(validation_loader))
        print(f'Epoch: {epoch+1}/{num_epochs},Training loss:{sum(train_losses)/len(train_losses)},Validation loss: {sum(valid_losses)/len(valid_losses)},validation Accuracy: {accuracy/len(validation_loader)*100}%')


Epoch: 1/50,Training loss:0.633804440498352,Validation loss: 0.7204402387142181,validation Accuracy: 62.25%
Epoch: 2/50,Training loss:0.5712051048874855,Validation loss: 0.5960188806056976,validation Accuracy: 78.25%
Epoch: 3/50,Training loss:0.5382442851861319,Validation loss: 0.5320881567895412,validation Accuracy: 83.0%
Epoch: 4/50,Training loss:0.5209242649376393,Validation loss: 0.5038420176133513,validation Accuracy: 84.0%
Epoch: 5/50,Training loss:0.5039063984155655,Validation loss: 0.5052298314869403,validation Accuracy: 79.25%
Epoch: 6/50,Training loss:0.495514190196991,Validation loss: 0.4919347061465184,validation Accuracy: 80.25%
Epoch: 7/50,Training loss:0.4887523634093149,Validation loss: 0.4857757054269314,validation Accuracy: 74.5%
Epoch: 8/50,Training loss:0.48114571832120423,Validation loss: 0.5003880797885358,validation Accuracy: 73.99999237060547%
Epoch: 9/50,Training loss:0.4743063373698129,Validation loss: 0.5045739457839065,validation Accuracy: 74.0%
Epoch: 10/50

In [11]:
# Plot training and validation losses
import matplotlib.pyplot as plt
plt.plot(train_losses, label='Training loss')
plt.plot(valid_losses, label='Validation loss')
plt.legend(frameon=False)

<matplotlib.legend.Legend at 0x123c2d128>

In [28]:
# predict test data:
accuracy = 0
with torch.no_grad():    
    for i, (items, classes) in enumerate(test_loader):
        outputs = net(items)
        
        ps = torch.exp(outputs)

        top_p, top_class = ps.topk(1, dim=1)
        labels = classes.view(*top_class.shape)

        equals = top_class == labels
        accuracy += torch.mean(equals.type(torch.FloatTensor))
        
print(f'Accuracy: {accuracy/len(test_loader)*100}%')
print(f'Predicted Outcome:{top_class.view(*classes.shape)}')
print(f'Actual Outcome:{classes}')      

Accuracy: 91.5%
Predicted Outcome:tensor([1, 1, 0, 0])
Actual Outcome:tensor([1, 1, 0, 0])
