# Heart Disease Dataset

### Read dataset

In [1]:
import pandas as pd
import numpy as np

def read_heart_disease_dataset():
    file_path = "./data/heart.csv"
    data = pd.read_csv(file_path)

    # One-hot encoding for field "sex"
    data["sex_male"] = (data["sex"] == 1).astype(int)
    data["sex_female"] = (data["sex"] == 0).astype(int)

    # One-hot encoding for field "cp" (chest pain type)
    cp_dummies = pd.get_dummies(data["cp"], prefix="cp_type")
    data = pd.concat([data, cp_dummies], axis=1)

    # One-hot encoding for field "restecg" (resting electrocardiographic results)
    restecg_dummies = pd.get_dummies(data["restecg"], prefix="restecg_type")
    data = pd.concat([data, restecg_dummies], axis=1)

    # One-hot encoding for field "slope"
    slope_dummies = pd.get_dummies(data["slope"], prefix="slope_type")
    data = pd.concat([data, slope_dummies], axis=1)

    # One-hot encoding for field "thal"
    thal_dummies = pd.get_dummies(data["thal"], prefix="thal_type")
    data = pd.concat([data, thal_dummies], axis=1)

    # Transform all fields to int
    data = data.astype(int)

    # Remove original columns
    data.drop(columns=["sex", "cp", "restecg", "slope", "thal"], inplace=True)

    data = data[[col for col in data.columns if col != 'target'] + ['target']]

    return data.to_numpy(), data.columns

array_data, columns = read_heart_disease_dataset()

print("Array Shape:", array_data[0])
print("Column Names:", columns)

Array Shape: [ 52 125 212   0 168   0   1   2   1   0   1   0   0   0   0   1   0   0
   0   1   0   0   0   1   0]
Column Names: Index(['age', 'trestbps', 'chol', 'fbs', 'thalach', 'exang', 'oldpeak', 'ca',
       'sex_male', 'sex_female', 'cp_type_0', 'cp_type_1', 'cp_type_2',
       'cp_type_3', 'restecg_type_0', 'restecg_type_1', 'restecg_type_2',
       'slope_type_0', 'slope_type_1', 'slope_type_2', 'thal_type_0',
       'thal_type_1', 'thal_type_2', 'thal_type_3', 'target'],
      dtype='object')


### Imports

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor

### Create new Dataset

In [3]:
class HeartDiseaseDataset(Dataset):
    def __init__(self, data):
        # Divide el array en características y etiquetas
        self.X = torch.tensor(data[:, :-1], dtype=torch.float32)
        self.y = torch.tensor(data[:, -1], dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


### Use Cuda

In [4]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


### Read from CSV

In [5]:
array_data_length = len(array_data)
test_data_items_length = int(array_data_length * 0.2)
train_data_items_length = array_data_length - test_data_items_length

dataset = HeartDiseaseDataset(array_data)
train_data, test_data = random_split(dataset, [train_data_items_length, test_data_items_length])

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(24, 300),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(300, 300),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(300, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 2),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=24, out_features=300, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=300, out_features=300, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=300, out_features=64, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Linear(in_features=64, out_features=2, bias=True)
  )
)


### Optimizing the Model Parameters

In [6]:
loss_fn = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

### Train

In [7]:
epochs = 100
batch_size = 64

test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)

print("Done!")

Epoch 1
-------------------------------
loss: 2.452157  [   64/  820]
Test Error: 
 Accuracy: 53.7%, Avg loss: 0.689286 

Epoch 2
-------------------------------
loss: 0.685975  [   64/  820]
Test Error: 
 Accuracy: 53.7%, Avg loss: 0.683998 

Epoch 3
-------------------------------
loss: 0.737140  [   64/  820]
Test Error: 
 Accuracy: 62.9%, Avg loss: 0.682536 

Epoch 4
-------------------------------
loss: 0.706393  [   64/  820]
Test Error: 
 Accuracy: 56.1%, Avg loss: 0.668994 

Epoch 5
-------------------------------
loss: 0.673298  [   64/  820]
Test Error: 
 Accuracy: 62.9%, Avg loss: 0.666499 

Epoch 6
-------------------------------
loss: 0.678741  [   64/  820]
Test Error: 
 Accuracy: 59.5%, Avg loss: 0.618453 

Epoch 7
-------------------------------
loss: 0.678622  [   64/  820]
Test Error: 
 Accuracy: 62.9%, Avg loss: 0.601545 

Epoch 8
-------------------------------
loss: 0.639382  [   64/  820]
Test Error: 
 Accuracy: 69.3%, Avg loss: 0.602505 

Epoch 9
----------------

### Test one

In [8]:
import random
import numpy as np

def testRandom(dataset):
    random_element = random.randint(0, len(dataset))
    X, Y = dataset[random_element]

    data = np.array([
        np.append(X.numpy(), Y.numpy())
    ])

    my_custom_dataset = HeartDiseaseDataset(data)
    dataloader = DataLoader(my_custom_dataset)

    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        pred = model(X)
        print("Pred:", pred.argmax(1).item(), "Real:", y.item())

testRandom(dataset)

Pred: 1 Real: 0


#### Save the model

In [9]:
torch.save(model.state_dict(), "model.pth")

#### Load the model

In [10]:
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))
model.to(device)

  model.load_state_dict(torch.load("model.pth"))


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=24, out_features=300, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=300, out_features=300, bias=True)
    (4): LeakyReLU(negative_slope=0.01)
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=300, out_features=64, bias=True)
    (7): LeakyReLU(negative_slope=0.01)
    (8): Linear(in_features=64, out_features=2, bias=True)
  )
)