In [38]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as stats

In [33]:
from ucimlrepo import fetch_ucirepo 

# fetch dataset 
heart_disease = fetch_ucirepo(id=45) 

# print(f'data: {heart_disease.data}')
# print(f'metadata: {heart_disease.metadata}')
# print(f'var: {heart_disease.variables}')

In [61]:
# data (as pandas dataframes) 
X = heart_disease.data.features 
y = heart_disease.data.targets 

y = (y['num'] >  0).astype(int)
# print(y.values)
# print(y.keys())
#features
#1 age
#2 sex
#3 cp
#4 trestbps
#5 chol
#6 fbs
#7 restecg
#8 thalach
#9 exang
#10 oldpeak
#11 slope
#12 ca
#13 thal
print(f'features keys:\n{X.keys()}')
# print('###############################')
# print('###############################')
# print('###############################')
# print(f'Targets num:\n{y}')
# we need to group them "binarize" into 0 & (1,2,3)=1


features keys:
Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')


In [52]:
#GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [53]:
# Step 0: Normalize data
# z-score all inputs features
cols2zscore = X.keys()
# cols2zscore = cols2zscore.drop('col-name')
X[cols2zscore] = X[cols2zscore].apply(stats.zscore)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[cols2zscore] = X[cols2zscore].apply(stats.zscore)


In [54]:
# Step1: Convert numpy data into tensor data
xx = torch.tensor( X[cols2zscore].values ).float()
yy  = torch.tensor( y.values  ).float()

In [55]:
xx.shape

torch.Size([303, 13])

In [56]:
yy.shape

torch.Size([303])

In [57]:
# Step2: use scikit learn to split the data
train_data, test_data, train_labels, test_labels = train_test_split(xx, yy, test_size=0.1)

In [58]:
# Step 3: Convert into PyTorch Datasets
train_data = TensorDataset(train_data, train_labels)
test_data  = TensorDataset(test_data , test_labels)

In [59]:
train_data.tensors[0].shape

torch.Size([272, 13])

In [60]:
test_data.tensors[0].shape

torch.Size([31, 13])

In [62]:
# Step 4: Translate into dataloader objects
batchsize    = 32
train_loader = DataLoader(train_data, batch_size=batchsize, shuffle=True, drop_last=True)
test_loader  = DataLoader(test_data,  batch_size=test_data.tensors[0].shape[0])

In [67]:
# Create the Deep Learning Model
def createTheNet():
    class diseasePredictionNet(nn.Module):
        def __init__(self):
            super().__init__()
            
            self.input = nn.Linear(2,20)
            
            self.bnormHidden1 =  nn.BatchNorm1d(20)
            self.hidden1 = nn.Linear(20,10)
            
            self.bnormHidden2 =  nn.BatchNorm1d(10)
            self.hidden2 = nn.Linear(10,5)
            
            self.output = nn.Linear(5,1)
            
        def forward(self, x):
            
            x = F.leaky_relu( self.input(x) )
            
            # x = self.bnormHidden1(x)
            x = F.leaky_relu( self.hidden1(x) )

            # x = self.bnormHidden2(x)
            x = F.leaky_relu( self.hidden2(x) )
                        
            x = self.output(x)
            
            return x
        
    diseasePredictionModel = diseasePredictionNet().to(device)
    
    lossfun = nn.BCEWithLogitsLoss()
    
    optimizer = torch.optim.Adam(diseasePredictionModel.parameters(), lr=0.01)#, betas=(0.9, 0.999), eps=1e-8)
    
    return diseasePredictionModel, lossfun, optimizer

In [68]:
# test the model that is has NO ERRORS!

net2, lossfun, optimizer = createTheNet()

input = torch.rand(1,2).to(device)
net2.eval()
net2(input)
print(input)
net2.train()

tensor([[0.1846, 0.1161]], device='cuda:0')


diseasePredictionNet(
  (input): Linear(in_features=2, out_features=20, bias=True)
  (bnormHidden1): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (hidden1): Linear(in_features=20, out_features=10, bias=True)
  (bnormHidden2): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (hidden2): Linear(in_features=10, out_features=5, bias=True)
  (output): Linear(in_features=5, out_features=1, bias=True)
)

In [70]:
# Train the Model

def trainTheModel(diseasePredictionTrain, lossfun, optimizer):
    
    #number of epochs to train
    numepochs = 50
        
    #initialize losses & accuracy
    losses   = torch.zeros(numepochs)
    trainAcc = []
    testAcc  = []
    
    for epochi in range(numepochs):
        
        #batch loss & accuracy
        batchLoss = []
        batchAcc  = []
        
        #loop over mini-batches
        for X,y in train_loader:
            
            # push data to GPU
            X = X.to(device)
            y = y.to(device)
            
            #Forward pass & loss
            yHat = diseasePredictionTrain(X)
            loss = lossfun(yHat, y)
            
            #backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            #loss from this batch
            batchLoss.append(loss.item())
            
            # accuracy from this batch for categorical data
            # batchMathces = (torch.abs(yHat - y)).detach().cpu().float().numpy()
            # batchAcc.append(( batchMathces < 1))
            
            #accuracy from this batch -> for BCE
            matches = torch.argmax(yHat,axis=1) == y ##
            matchesNumeric = matches.float()
            batchAcc.append( 100*torch.mean(matchesNumeric).item() )
        
        #average accuracy across mini-batches
        trainAcc.append(100 * np.mean((batchAcc)))
        
        #average losses across all mini-batches
        losses[epochi] = np.mean(batchLoss)
        
        ################################
        
        #final forward pass for Test Accuracy
        X,y = next(iter(test_loader))
        
        # push data to GPU
        X = X.to(device)
        y = y.to(device)

        with torch.no_grad():
            yHat = diseasePredictionTrain(X)
            
        #compute the test accuracy for categorical data
        # testMatches = (torch.abs(yHat - y)).detach().cpu().float().numpy()
        # testMatchesNumeric = (testMatches < 1)
        # testAcc.append(100 * np.mean( testMatchesNumeric ) )

        #compute the test accuracy for BCE
        testMatches = torch.argmax(yHat, axis=1) == y
        testMatchesNumeric = testMatches.float()
        testAcc.append(100 * np.mean( testMatchesNumeric ).item() )
    
    return trainAcc, testAcc, losses, diseasePredictionTrain

In [71]:
# Test the training code that it has NO ERRORS
diseaseModel, lossfun, optimizer = createTheNet()
trainAcc, testAcc, losses, diseasePredictionTrained = trainTheModel(diseaseModel, lossfun, optimizer)

NameError: name 'createTheNet' is not defined

In [None]:
fig,ax = plt.subplots(1,2,figsize=(10,5))

ax[0].plot(losses)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_title('Model Loss')

ax[1].plot(trainAcc, label='Train')
ax[1].plot(testAcc, label='Test')
ax[1].legend()
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy')
ax[1].set_title(f'Test accuracy {testAcc[-1]:.2f}%')
ax[1].set_ylim([0,110])

plt.show()