In [19]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn

In [20]:
df = pd.read_csv("datasets/heart_disease_uci.csv")
df.drop(columns=["id", "dataset"], inplace=True)

In [21]:
categorial_columns = ["sex", "cp", "fbs", "restecg", "exang", "slope", "thal"]
cont_columns = ["age", "trestbps", "chol", "thalch", "oldpeak", "ca"]

In [25]:
df[categorial_columns] = df[categorial_columns].apply(LabelEncoder().fit_transform)
df[categorial_columns] = MinMaxScaler().fit_transform(df[categorial_columns])

df[cont_columns] = MinMaxScaler().fit_transform(df[cont_columns])
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,num
0,0.714286,1.0,1.000000,0.725,0.386401,0.5,0.000000,0.633803,0.0,0.556818,0.000000,0.000000,0.000000,0
1,0.795918,1.0,0.000000,0.800,0.474295,0.0,0.000000,0.338028,0.5,0.465909,0.333333,1.000000,0.333333,2
2,0.795918,1.0,0.000000,0.600,0.379768,0.0,0.000000,0.485915,0.5,0.590909,0.333333,0.666667,0.666667,1
3,0.183673,1.0,0.666667,0.650,0.414594,0.0,0.333333,0.894366,0.0,0.693182,0.000000,0.000000,0.333333,0
4,0.265306,0.0,0.333333,0.650,0.338308,0.0,0.000000,0.788732,0.0,0.454545,0.666667,0.000000,0.333333,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,0.530612,0.0,0.000000,0.635,0.552239,0.5,0.666667,0.661972,0.0,0.295455,1.000000,,1.000000,1
916,0.693878,1.0,1.000000,,0.230514,0.0,0.666667,,1.0,,1.000000,,1.000000,0
917,0.551020,1.0,0.000000,0.610,0.369818,0.5,0.666667,0.281690,0.0,0.295455,1.000000,,0.000000,2
918,0.612245,1.0,0.000000,,0.638474,0.5,0.000000,,1.0,,1.000000,,1.000000,0


In [23]:
class HeartDiseaseDataset(Dataset):
    def __init__(self, df):
        print(df)
        self.y = torch.tensor(df.loc["num"].values, dtype=torch.float)
        self.x = torch.tensor(df.drop(columns="num").values, dtype=torch.float)
        print(self.x)
        print(self.y)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    def __len__(self):
        return self.x.shape[0]

In [24]:
dataset = HeartDiseaseDataset(df)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
len(dataset)

          age  sex        cp  trestbps      chol  fbs   restecg    thalch   
0    0.714286  1.0  1.000000     0.725  0.386401  0.5  0.000000  0.633803  \
1    0.795918  1.0  0.000000     0.800  0.474295  0.0  0.000000  0.338028   
2    0.795918  1.0  0.000000     0.600  0.379768  0.0  0.000000  0.485915   
3    0.183673  1.0  0.666667     0.650  0.414594  0.0  0.333333  0.894366   
4    0.265306  0.0  0.333333     0.650  0.338308  0.0  0.000000  0.788732   
..        ...  ...       ...       ...       ...  ...       ...       ...   
915  0.530612  0.0  0.000000     0.635  0.552239  0.5  0.666667  0.661972   
916  0.693878  1.0  1.000000       NaN  0.230514  0.0  0.666667       NaN   
917  0.551020  1.0  0.000000     0.610  0.369818  0.5  0.666667  0.281690   
918  0.612245  1.0  0.000000       NaN  0.638474  0.5  0.000000       NaN   
919  0.693878  1.0  0.333333     0.600  0.421227  0.0  0.000000  0.232394   

     exang   oldpeak     slope        ca      thal  num  
0      0.0  0.556

KeyError: 'num'

In [None]:
class MixedTypeClusteringModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(13, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, 7),
            torch.nn.ReLU(),
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(7, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, 13),
            torch.nn.Sigmoid()
        )


    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


epochs = 10
lr = 0.001

model = MixedTypeClusteringModel()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(epochs):
    model.train()
    loss = 0

    for x, _ in dataloader:
        optimizer.zero_grad()
        outputs = model(x)
        train_loss = criterion(outputs,  x)
        train_loss.backward()
        optimizer.step()
        loss += train_loss.item()

    loss = loss / len(dataloader)
    print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))