# Heart Disease Dataset

### Read dataset

In [35]:
import pandas as pd
import numpy as np

def read_heart_disease_dataset():
    file_path = "./data/heart.csv"
    data = pd.read_csv(file_path)

    # One-hot encoding for field "sex"
    data["sex_male"] = (data["sex"] == 1).astype(int)
    data["sex_female"] = (data["sex"] == 0).astype(int)

    # One-hot encoding for field "cp" (chest pain type)
    cp_dummies = pd.get_dummies(data["cp"], prefix="cp_type")
    data = pd.concat([data, cp_dummies], axis=1)

    # One-hot encoding for field "restecg" (resting electrocardiographic results)
    restecg_dummies = pd.get_dummies(data["restecg"], prefix="restecg_type")
    data = pd.concat([data, restecg_dummies], axis=1)

    # One-hot encoding for field "slope"
    slope_dummies = pd.get_dummies(data["slope"], prefix="slope_type")
    data = pd.concat([data, slope_dummies], axis=1)

    # One-hot encoding for field "thal"
    thal_dummies = pd.get_dummies(data["thal"], prefix="thal_type")
    data = pd.concat([data, thal_dummies], axis=1)

    # Transform all fields to int
    data = data.astype(int)

    # Remove original columns
    data.drop(columns=["sex", "cp", "restecg", "slope", "thal"], inplace=True)

    return data

data = read_heart_disease_dataset()

array_data = data.to_numpy()
columns = data.columns

print("Array Shape:", array_data[0])
print("Column Names:", columns)

Array Shape: [ 52 125 212   0 168   0   1   2   0   1   0   1   0   0   0   0   1   0
   0   0   1   0   0   0   1]
Column Names: Index(['age', 'trestbps', 'chol', 'fbs', 'thalach', 'exang', 'oldpeak', 'ca',
       'target', 'sex_male', 'sex_female', 'cp_type_0', 'cp_type_1',
       'cp_type_2', 'cp_type_3', 'restecg_type_0', 'restecg_type_1',
       'restecg_type_2', 'slope_type_0', 'slope_type_1', 'slope_type_2',
       'thal_type_0', 'thal_type_1', 'thal_type_2', 'thal_type_3'],
      dtype='object')


### Imports

In [36]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor

### Read from CSV

In [38]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data, test_data = random_split(array_data, [1200, 600])

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

ValueError: Sum of input lengths does not equal the length of the input dataset!