In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from ucimlrepo import fetch_ucirepo

### Get and process the dataset

In [None]:
heart_disease = fetch_ucirepo(id=45)
heart_disease.data.features

In [None]:
data = heart_disease.data.features
diagnoses = heart_disease.data.targets

# data contain some NaN, so we drop those rows
rows_with_nan = data[data.isin([np.nan]).any(axis=1)].index
data = data.drop(rows_with_nan)
diagnoses = diagnoses.drop(rows_with_nan)

In [None]:
# z-score the non-categorical columns
cols2zscore = data.keys()
cols2zscore = cols2zscore.drop(['sex', 'fbs', 'exang'])

for c in cols2zscore:
    d = pd.to_numeric(
        data[c])  # force to numeric (addresses some data-format issues)
    data[c] = (d - d.mean()) / d.std(ddof=1)

In [None]:
data

### Convert to tensor and DataLoader

In [None]:
data = torch.tensor(data.values).float()
diagnoses = torch.tensor(diagnoses.values).float()
diagnoses = torch.where(diagnoses > 0, 1.0, 0.0)

In [None]:
print(data.shape)
print(data)

In [None]:
print(diagnoses.shape)
print(diagnoses)

In [None]:
# split into train/test
train_data, test_data, train_diagnoses, test_diagnoses = train_test_split(
    data, diagnoses, test_size=.1)

# convert into PyTorch Datasets
train_data = torch.utils.data.TensorDataset(train_data, train_diagnoses)
test_data = torch.utils.data.TensorDataset(test_data, test_diagnoses)

# translate into dataloader objects
batch_size = 16
train_loader = DataLoader(train_data,
                          batch_size=batch_size,
                          shuffle=True,
                          drop_last=True)
test_loader = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0])

### Make the net

In [None]:
class Net(nn.Module):

    def __init__(self):
        super().__init__()
        self.input = nn.Linear(13, 32)
        self.fc1 = nn.Linear(32, 64)
        self.fc2 = nn.Linear(64, 16)
        self.output = nn.Linear(16, 1)

    def forward(self, x):
        x = nn.functional.relu(self.input(x))
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        return self.output(x)

In [None]:
class NNPipeline():

    def __init__(self, train_loader, test_loader):
        self._net = Net()
        self._lossfun = nn.BCEWithLogitsLoss()
        self._optimizer = torch.optim.Adam(self._net.parameters(), lr=.0001)
        self._train_loader = train_loader
        self._test_loader = test_loader

    def train(self, num_epochs=100):
        losses = torch.zeros(num_epochs)
        train_accuracy = []
        test_accuracy = []

        for epochi in range(num_epochs):
            batch_accuracy = []
            batch_loss = []
            for X, y in self._train_loader:
                # forward pass and loss
                y_hat = self._net(X)
                loss = self._lossfun(y_hat, y)

                # backprop
                self._optimizer.zero_grad()
                loss.backward()
                self._optimizer.step()

                # loss and accuracy for this batch
                batch_loss.append(loss.item())
                batch_accuracy.append(100 * torch.mean(
                    ((y_hat > 0) == y).float()).item())

            # average losses and accuracy across the batches
            losses[epochi] = np.mean(batch_loss)
            train_accuracy.append(np.mean(batch_accuracy))

            # compute test accuracy
            X, y = next(iter(self._test_loader))
            with torch.no_grad():
                y_hat = self._net(X)
            test_accuracy.append(100 * torch.mean(
                ((y_hat > 0) == y).float()).item())

        return train_accuracy, test_accuracy, losses, self._net

    def predict(self, loader):
        with torch.no_grad():
            y_hat = self._net(loader.dataset.tensors[0])
        return torch.round(y_hat.detach())

### Run the net

In [None]:
neural_network = NNPipeline(train_loader, test_loader)
train_accuracy, test_accuracy, losses, net = neural_network.train(num_epochs=1000)

In [None]:
print(f'Losses: {losses}')
print(f'Training accuracy: {train_accuracy}')
print(f'Testing accuracy: {test_accuracy}')

In [None]:
# plot the results
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

ax[0].plot(losses)
ax[0].set_ylabel('Loss')
ax[0].set_xlabel('Epochs')
ax[0].set_title('Losses')
ax[0].legend("Losses")

ax[1].plot(train_accuracy)
ax[1].plot(test_accuracy)
ax[1].set_title('Accuracy')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].legend(["Train", "Test"])
ax[1].set_ylim([0, 100])

plt.show()