In [58]:
import torch
import pandas as pd
import matplotlib.pyplot as plt

In [59]:
heart_dataset = pd.read_csv('heart.csv')

In [60]:
heart_dataset.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [61]:
df_columns = list(heart_dataset.columns)
df_columns

['Age',
 'Sex',
 'ChestPainType',
 'RestingBP',
 'Cholesterol',
 'FastingBS',
 'RestingECG',
 'MaxHR',
 'ExerciseAngina',
 'Oldpeak',
 'ST_Slope',
 'HeartDisease']

In [62]:
heart_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [63]:
object_columns = []
for x in df_columns:
    if heart_dataset[x].dtype == 'object':
        print(x)
        object_columns.append(x)

Sex
ChestPainType
RestingECG
ExerciseAngina
ST_Slope


In [64]:
for column in object_columns:
    unique_values = heart_dataset[column].unique()

    mapping = {val: i for i, val in enumerate(unique_values)}
    heart_dataset[column] = heart_dataset[column].map(mapping)


In [65]:
heart_dataset

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,0,0,140,289,0,0,172,0,0.0,0,0
1,49,1,1,160,180,0,0,156,0,1.0,1,1
2,37,0,0,130,283,0,1,98,0,0.0,0,0
3,48,1,2,138,214,0,0,108,1,1.5,1,1
4,54,0,1,150,195,0,0,122,0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,0,3,110,264,0,0,132,0,1.2,1,1
914,68,0,2,144,193,1,0,141,0,3.4,1,1
915,57,0,2,130,131,0,0,115,1,1.2,1,1
916,57,1,0,130,236,0,2,174,0,0.0,1,1


In [66]:
X = heart_dataset.drop(columns=['HeartDisease']).values
y = heart_dataset['HeartDisease'].values

print(X.shape)
print(y.shape)

X = torch.tensor(X, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.float32)


from torch.utils.data import TensorDataset

heart_dataset_tensor = TensorDataset(X,y)

(918, 11)
(918,)


In [67]:
#defining test, train and validation sets

from torch.utils.data import random_split

train_size = int(0.8 * len(heart_dataset_tensor))
val_size = int(0.1 * len(heart_dataset_tensor))

test_size = len(heart_dataset_tensor) - train_size - val_size

train_dataset, validation_dataset, test_dataset = random_split(heart_dataset_tensor, [train_size,val_size, test_size])




In [68]:
#split Datset into batches
from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=32,shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=32)
train_loader = DataLoader(test_dataset, batch_size=32)

In [69]:
import torch.nn as nn

class Wide(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(11,33)
        self.relu = nn.ReLU()
        self.output = nn.Linear(33,1)


        #self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        x = self.relu(self.hidden(x))
        x = self.output(x)
        return x

In [70]:
model = Wide()
print(sum([x.reshape(-1).shape[0] for x in model.parameters()]))

430


In [71]:
#creating the Loss function
# using pytorch's cross entropy loss function just cuz its good at binary classification

loss_fn = nn.BCEWithLogitsLoss()

#Use loss_fn(outputs,labels)

In [72]:
#optimizer - Scholastic Gradient Descent:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
#training loop function

def train_one_epoch(epoch_index, tb_writer):
    
    running_loss = 0.0
    last_loss = 0.0

    for i, data in enumerate(train_loader):
        inputs, labels = data

        #resets the optimizer after every gradient descent
        optimizer.zero_grad()

        #makes predictions:
        outputs = model(inputs)

        #usign the loss function, computing the loss and the gradients:
        loss = loss_fn(outputs, labels.unsqueeze(1).float())

        loss.backward()
        
        #adjust the learning weights:
        optimizer.step()

        #report the data
        running_loss += loss.item()
        if i % 50 == 49:
            last_loss = running_loss/50
            print('  batch{} loss: {}'.format(i+1, last_loss))
            tb_x = epoch_index * len(train_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss,tb_x)
            running_loss = 0

        return last_loss


In [74]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/heart_disease_classfication_trainer')

#training Loop
epoch_number = 0.
EPOCHS = 5

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('Epoch{}:'.format(epoch_number + 1) )


    model.train(True)
    avg_loss = train_one_epoch(epoch_number,writer)

    running_vloss = 0.
    model.eval()

    #disable gradient computation to save memory:
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels.unsqueeze(1).float())
            running_vloss += vloss
    avg_vloss = running_vloss/(i+1)
    print('Loss train{} valid {}'.format(avg_loss,avg_vloss))

    writer.add_scalars('Training vs. Validation Loss',
                       {'Training': avg_loss, 'Validation':avg_vloss},
                       epoch_number+1)
    writer.flush()

    if avg_loss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}'.format(epoch_number)
        torch.save(model.state_dict(),model_path)

    epoch_number += 1 

Epoch1.0:
tensor([[5.3000e+01, 1.0000e+00, 2.0000e+00, 1.3800e+02, 2.3400e+02, 0.0000e+00,
         2.0000e+00, 1.6000e+02, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [3.7000e+01, 0.0000e+00, 1.0000e+00, 1.3000e+02, 1.9400e+02, 0.0000e+00,
         0.0000e+00, 1.5000e+02, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [5.9000e+01, 0.0000e+00, 2.0000e+00, 1.2000e+02, 0.0000e+00, 0.0000e+00,
         0.0000e+00, 1.1500e+02, 0.0000e+00, 0.0000e+00, 1.0000e+00],
        [6.1000e+01, 0.0000e+00, 2.0000e+00, 1.9000e+02, 2.8700e+02, 1.0000e+00,
         2.0000e+00, 1.5000e+02, 1.0000e+00, 2.0000e+00, 2.0000e+00],
        [6.1000e+01, 0.0000e+00, 2.0000e+00, 1.1000e+02, 0.0000e+00, 1.0000e+00,
         0.0000e+00, 1.0800e+02, 1.0000e+00, 2.0000e+00, 2.0000e+00],
        [7.2000e+01, 0.0000e+00, 2.0000e+00, 1.4300e+02, 2.1100e+02, 0.0000e+00,
         0.0000e+00, 1.0900e+02, 1.0000e+00, 1.4000e+00, 1.0000e+00],
        [4.8000e+01, 1.0000e+00, 2.0000e+00, 1.3800e+02, 2.1400e+02, 0.0000e+00,
   