# using deep learning and pytorch to make predictions if someone has or not heart disease

In [1]:
# imports cell
import pandas as pd
import numpy as np
import torch
from torch import nn
import matplotlib as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

## 1. import data

In [2]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [8]:
data = pd.read_csv("/content/drive/MyDrive/data/heart-disease.csv")
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


# 2. split data into labels (y) and features (x)

In [9]:
# setup device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [10]:
x = data.drop("target", axis=1)
y = data["target"]
x = x.values
y = y.values

print(len(x), len(y))
print(type(x), type(y))

303 303
<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [15]:
# split data into train and test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# turn it into tensors
x_train_t = torch.from_numpy(x_train).type(torch.float).to(device)
x_test_t = torch.from_numpy(x_test).type(torch.float).to(device)
y_train_t = torch.from_numpy(y_train).type(torch.float).to(device)
y_test_t = torch.from_numpy(y_test).type(torch.float).to(device)
x_train_t

tensor([[37.,  0.,  2.,  ...,  2.,  0.,  2.],
        [43.,  1.,  0.,  ...,  2.,  0.,  3.],
        [56.,  1.,  2.,  ...,  1.,  1.,  1.],
        ...,
        [56.,  1.,  0.,  ...,  0.,  0.,  3.],
        [58.,  1.,  0.,  ...,  1.,  3.,  3.],
        [52.,  1.,  1.,  ...,  2.,  1.,  2.]], device='cuda:0')

# 3. create neural net class

In [12]:
# baseline model
class HeartDiseaseV0(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(in_features=13, out_features=16)
    self.layer_2 = nn.Linear(in_features=16, out_features=32)
    self.layer_3 = nn.Linear(in_features=32, out_features=64)
    self.layer_4 = nn.Linear(in_features=64, out_features=16)
    self.layer_5 = nn.Linear(in_features=16, out_features=1)
    self.relu = nn.RReLU()

  def forward(self, x):
    return self.layer_5(self.relu(self.layer_4(self.relu(self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))))))

model_0 = HeartDiseaseV0().to(device)
model_0

HeartDiseaseV0(
  (layer_1): Linear(in_features=13, out_features=16, bias=True)
  (layer_2): Linear(in_features=16, out_features=32, bias=True)
  (layer_3): Linear(in_features=32, out_features=64, bias=True)
  (layer_4): Linear(in_features=64, out_features=16, bias=True)
  (layer_5): Linear(in_features=16, out_features=1, bias=True)
  (relu): RReLU(lower=0.125, upper=0.3333333333333333)
)

In [18]:
# select optim and loss_fn
loss_fn = nn.BCEWithLogitsLoss()
optim = torch.optim.NAdam(params=model_0.parameters(), lr=0.01)

# 4. build a training loop

In [19]:
epochs = 10000

for epoch in range(epochs):
  model_0.train()
  # forward pass
  train_logits = model_0(x_train_t).squeeze()

  # calculate the loss
  train_loss = loss_fn(train_logits,
                       y_train_t)

  # optimizer zero grad
  optim.zero_grad()

  # loss backwards
  train_loss.backward()

  # optimizer step
  optim.step()

  # testing
  model_0.eval()
  with torch.inference_mode():
    test_logits = model_0(x_test_t).squeeze()

    # calculate loss and acc
    test_loss = loss_fn(test_logits,
                        y_test_t)
    acc = accuracy_score(torch.round(torch.sigmoid(test_logits)).cpu().detach().numpy(), y_test)

  # print out model state
  if epoch % 1000 == 0:
    print(f'epoch: {epoch}, train_loss: {train_loss}, test_loss: {test_loss}, test_acc: {acc}')

epoch: 0, train_loss: 35.03316879272461, test_loss: 29.467859268188477, test_acc: 0.7704918032786885
epoch: 1000, train_loss: 1.1278135776519775, test_loss: 1.8090189695358276, test_acc: 0.8032786885245902
epoch: 2000, train_loss: 1.2098119258880615, test_loss: 1.2862540483474731, test_acc: 0.8032786885245902
epoch: 3000, train_loss: 2.767098903656006, test_loss: 2.4577715396881104, test_acc: 0.7868852459016393
epoch: 4000, train_loss: 0.5916056632995605, test_loss: 2.0611767768859863, test_acc: 0.7377049180327869
epoch: 5000, train_loss: 0.5152229070663452, test_loss: 2.5788724422454834, test_acc: 0.639344262295082
epoch: 6000, train_loss: 0.40081268548965454, test_loss: 3.181364059448242, test_acc: 0.6885245901639344
epoch: 7000, train_loss: 0.3059134781360626, test_loss: 3.2707483768463135, test_acc: 0.7540983606557377
epoch: 8000, train_loss: 0.2761072814464569, test_loss: 3.4383533000946045, test_acc: 0.7377049180327869
epoch: 9000, train_loss: 1.054504156112671, test_loss: 1.7014

In [34]:
x_test_tensor = torch.from_numpy(x_test).type(torch.float).to(device)
type(x_test_tensor), type(y_test),x_test_tensor.dtype

(torch.Tensor, numpy.ndarray, torch.float32)

In [41]:
y_preds = model_0(x_test_tensor).squeeze().to('cpu')

accuracy_baseline_model = accuracy_score(y_test, torch.round(torch.sigmoid(y_preds)).detach().numpy())

print(f'BaseLine model got: {round(accuracy_baseline_model*100, 2)}% accuracy score')

BaseLine model got: 78.69% accuracy score
