In [1]:
import pandas as pd
import numpy as np

from sklearn.utils import check_random_state
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

from imblearn.over_sampling import SMOTE

import matplotlib.pyplot as plt

random_seed = 42

np.random.seed(random_seed)
torch.manual_seed(random_seed)
check_random_state(random_seed)

RandomState(MT19937) at 0x7FA348CC7D40

In [2]:
# data balancing
def perf_smote(x, y, seed):
    x_reshaped = x.reshape(x.shape[0], -1)

    smote = SMOTE(random_state=seed)

    x_resampled, y_resampled = smote.fit_resample(x_reshaped, y)
    x_resampled = x_resampled.reshape(x_resampled.shape[0], *x.shape[1:])

    return x_resampled, y_resampled

In [3]:
DATA_PATH = "dataset/dickson_liver_cirrhosis.csv"
TARGET_COL = "Stage"

data_df = pd.read_csv(DATA_PATH)
data_df.head()

Unnamed: 0,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,2221,C,Placebo,18499,F,N,Y,N,N,0.5,149.0,4.04,227.0,598.0,52.7,57.0,256.0,9.9,1
1,1230,C,Placebo,19724,M,Y,N,Y,N,0.5,219.0,3.93,22.0,663.0,45.0,75.0,220.0,10.8,2
2,4184,C,Placebo,11839,F,N,N,N,N,0.5,320.0,3.54,51.0,1243.0,122.45,80.0,225.0,10.0,2
3,2090,D,Placebo,16467,F,N,N,N,N,0.7,255.0,3.74,23.0,1024.0,77.5,58.0,151.0,10.2,2
4,2105,D,Placebo,21699,F,N,Y,N,N,1.9,486.0,3.54,74.0,1052.0,108.5,109.0,151.0,11.5,1


In [4]:
object_cols=[col for col in data_df.columns if data_df[col].dtype=="object"]
number_of_unique={col:len(data_df[col].unique()) for col in object_cols}

print(object_cols)
print(number_of_unique)

ohe = OneHotEncoder()
ord = OrdinalEncoder()

cols_for_ordinal = ["Status", "Sex"]
cols_for_ohe = ["Drug", "Ascites", "Hepatomegaly", "Spiders", "Edema"]

ord_result  = ord.fit_transform(data_df[cols_for_ordinal])
ohe_result = ohe.fit_transform(data_df[cols_for_ohe])

columns_ohe = ohe.get_feature_names_out(cols_for_ohe)

ord_df = pd.DataFrame(ord_result, columns=cols_for_ordinal)
ohe_df = pd.DataFrame(ohe_result.toarray(), columns=columns_ohe)

data_df = pd.concat([data_df, ohe_df], axis=1).drop(columns=cols_for_ohe)
data_df[cols_for_ordinal] = ord_df

['Status', 'Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
{'Status': 3, 'Drug': 2, 'Sex': 2, 'Ascites': 2, 'Hepatomegaly': 2, 'Spiders': 2, 'Edema': 3}


In [5]:
val_perct = 0.2
test_perct = 0.1

x, y = data_df.drop(TARGET_COL, axis=1).values, data_df[TARGET_COL].values
y = y - 1

scaler = MinMaxScaler()
x = scaler.fit_transform(x)

x, y = perf_smote(x, y, random_seed)

x = torch.from_numpy(x).float()
y = torch.from_numpy(y).long()

num_class = len(np.unique(y))

print(f"Number of classes: {num_class}")

train_x, val_x, train_y, val_y = train_test_split(x, y, test_size=val_perct, stratify=y, random_state=random_seed)
train_x, test_x, train_y, test_y = train_test_split(train_x, train_y, test_size=test_perct, stratify=train_y, random_state=random_seed)

Number of classes: 3


In [6]:
class TabDataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        x = self.data[idx]
        y = self.targets[idx]
        return x, y

bs = 128

train_ds = TabDataset(train_x, train_y)
val_ds = TabDataset(val_x, val_y)
test_ds = TabDataset(test_x, test_y)

train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True, drop_last=True)
val_dl = DataLoader(val_ds, batch_size=bs, shuffle=False)
test_dl = DataLoader(test_ds, batch_size=bs, shuffle=False)

In [7]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import MultiheadAttention

class AttentiveMLP(nn.Module):
    def __init__(self, num_class, input_dim=24):
        super(AttentiveMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, num_class)

        self.att1 = MultiheadAttention(embed_dim=512, num_heads=8)
        self.att2 = MultiheadAttention(embed_dim=256, num_heads=8)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x, _ = self.att1(x.unsqueeze(1), x.unsqueeze(1), x.unsqueeze(1))
        x = x.squeeze(1)
        x = F.relu(self.fc2(x))
        x, _ = self.att2(x.unsqueeze(1), x.unsqueeze(1), x.unsqueeze(1))
        x = x.squeeze(1)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        x = F.softmax(x, dim=1)
        return x

In [8]:
input_size = 24

model = AttentiveMLP(num_class, input_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
num_epochs = 5
print_every = 10

valid_loss_min = np.Inf
val_loss = []
val_acc = []

train_loss = []
train_acc = []

total_step = len(train_dl)

for epoch in range(num_epochs):
    # model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    print(f'Epoch {epoch+1}\n')

    for i, (inputs, labels) in enumerate(train_dl):
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        pred = torch.argmax(outputs, dim=1)
        correct += torch.sum(pred == labels).item()
        total += labels.size(0)

        # if i % print_every == 0:
        #     print(
        #         f"Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}], Loss: {running_loss/10:.4f}"
        #     )
        #     running_loss = 0.0

        if (i+1) % print_every == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')
    
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss / total_step)
    print(f"\nTrain Accuracy: {train_acc[-1]:.2f}%")
    print(f"Train Loss: {np.mean(train_loss):.2f}\n\n")

Epoch 1

Epoch [1/5], Step [10/142], Loss: 1.1030
Epoch [1/5], Step [20/142], Loss: 1.1002
Epoch [1/5], Step [30/142], Loss: 1.0978
Epoch [1/5], Step [40/142], Loss: 1.1009
Epoch [1/5], Step [50/142], Loss: 1.0989
Epoch [1/5], Step [60/142], Loss: 1.0982
Epoch [1/5], Step [70/142], Loss: 1.0990
Epoch [1/5], Step [80/142], Loss: 1.0991
Epoch [1/5], Step [90/142], Loss: 1.0984
Epoch [1/5], Step [100/142], Loss: 1.0982
Epoch [1/5], Step [110/142], Loss: 1.1022
Epoch [1/5], Step [120/142], Loss: 1.0985
Epoch [1/5], Step [130/142], Loss: 1.0993
Epoch [1/5], Step [140/142], Loss: 1.0987

Train Accuracy: 32.60%
Train Loss: 1.10


Epoch 2

Epoch [2/5], Step [10/142], Loss: 1.0995
Epoch [2/5], Step [20/142], Loss: 1.0996
Epoch [2/5], Step [30/142], Loss: 1.0991
Epoch [2/5], Step [40/142], Loss: 1.0987
Epoch [2/5], Step [50/142], Loss: 1.0991
Epoch [2/5], Step [60/142], Loss: 1.0993
Epoch [2/5], Step [70/142], Loss: 1.0987
Epoch [2/5], Step [80/142], Loss: 1.0990
Epoch [2/5], Step [90/142], Loss

In [None]:
n_epochs = 12
print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader)
for epoch in range(1, n_epochs+1):
    running_loss = 0.0
    # scheduler.step(epoch)
    correct = 0
    total=0
    print(f'Epoch {epoch}\n')
    for batch_idx, (data_, target_) in enumerate(train_loader):
        #data_, target_ = data_.to(device), target_.to(device)# on GPU
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = model(data_)
        loss = criterion(outputs, target_)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        _,pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred==target_).item()
        total += target_.size(0)
        if (batch_idx) % 20 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, n_epochs, batch_idx, total_step, loss.item()))
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss/total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')
    batch_loss = 0
    total_t=0
    correct_t=0
    with torch.no_grad():
        model.eval()
        for data_t, target_t in (validation_loader):
            #data_t, target_t = data_t.to(device), target_t.to(device)# on GPU
            outputs_t = model(data_t)
            loss_t = criterion(outputs_t, target_t)
            batch_loss += loss_t.item()
            _,pred_t = torch.max(outputs_t, dim=1)
            correct_t += torch.sum(pred_t==target_t).item()
            total_t += target_t.size(0)
        val_acc.append(100 * correct_t / total_t)
        val_loss.append(batch_loss/len(validation_loader))
        network_learned = batch_loss < valid_loss_min
        print(f'validation loss: {np.mean(val_loss):.4f}, validation acc: {(100 * correct_t / total_t):.4f}\n')
        # Saving the best weight 
        if network_learned:
            valid_loss_min = batch_loss
            torch.save(model.state_dict(), 'model_classification_tutorial.pt')
            print('Detected network improvement, saving current model')
    model.train()