In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torchvision as tv
import ast
from tqdm import tqdm
from sklearn.metrics import accuracy_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_default_dtype(torch.float32)

In [2]:
data_root = os.path.join("./", "data/")
y_train_raw = pd.read_csv(os.path.join(data_root, "y_train.csv"), header=None)

In [3]:
data_root = os.path.join("./", "data/")

x_train = np.load(os.path.join(data_root, "X_train.npy"))
y_train_raw = pd.read_csv(os.path.join(data_root, "y_train.csv"), header=None)

# convert strings to corresponding arrays
y_train_raw[0] = y_train_raw[0].apply(lambda x: ast.literal_eval(x))
y_train_raw = y_train_raw[0].values

x_test = np.load(os.path.join(data_root, "X_test.npy"))
y_test_raw = pd.read_csv(os.path.join(data_root, "y_test.csv"), header=None)
y_test_raw[0] = y_test_raw[0].apply(lambda x: ast.literal_eval(x))
y_test_raw = y_test_raw[0].values

class_to_index = {
    "NORM": 0,
    "MI": 1,
    "HYP": 2,
    "STTC": 3,
    "CD": 4
}

# Encoding the labels for multi-label classification
y_test = torch.zeros((len(y_test_raw), len(class_to_index)), dtype=torch.float32)
for i, classification in enumerate(y_test_raw):
    for class_name in classification:
        y_test[i, class_to_index[class_name]] = 1

y_train = torch.zeros((len(y_train_raw), len(class_to_index)), dtype=torch.float32)
for i, classification in enumerate(y_train_raw):
    for class_name in classification:
        y_train[i, class_to_index[class_name]] = 1

x_train = torch.tensor(x_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)

# Free up some memory
del y_train_raw
del y_test_raw

In [4]:
BATCH_SIZE = 128

train_set = torch.utils.data.TensorDataset(x_train, y_train)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)

test_set = torch.utils.data.TensorDataset(x_test, y_test)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self, max_len=1000, emb_size=12):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, emb_size)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, emb_size, 2).float() * (-np.log(10000.0) / emb_size))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pe", pe)

    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class Transformer(nn.Transformer):
    def __init__(self, emb_size=12, nhead=6, depth=6, hidden_size=128, seq_length=1000, num_classes=5):
        super(Transformer, self).__init__(d_model=emb_size, nhead=nhead, num_encoder_layers=depth, num_decoder_layers=depth, dim_feedforward=hidden_size)
    
        self.pos_encoder = PositionalEncoding(seq_length, emb_size)
        self.decoder = nn.Linear(emb_size, 128)
        self.linear1 = nn.Linear(128, num_classes)
        
    def forward(self, x):
        #x = self.pos_encoder(x)
        x = self.encoder(x)
        x = x.mean(dim=1)
        x = self.decoder(x)
        x = torch.relu(x)
        x = self.linear1(x)
        x = torch.sigmoid(x)
        return x
    

In [6]:
def train(net, optimizer, criterion, train_loader, epochs=10, scheduler=None):
    net = net.to(device)

    train_losses = []

    for _ in range(epochs):
        pbar = tqdm(train_loader, total=len(train_loader))
        for i, (x, y) in enumerate(pbar):
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            y_pred = net(x)
            loss = criterion(y_pred, y)
            loss.backward()
            nn.utils.clip_grad_norm_(net.parameters(), 5)
            optimizer.step()
            
            # exact match ratio
            acc = accuracy_score(y.cpu().detach().numpy(), y_pred.cpu().detach().numpy().round())
            pbar.set_description(f"loss: {loss.item():.4f}, acc: {acc:.4f}")
            
            if scheduler is not None:
                scheduler.step(loss.item())

    return train_losses

In [10]:
net = Transformer(nhead=6, hidden_size=512, depth=3)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=20, verbose=True, cooldown=20)
train(net, optimizer, criterion, train_loader, epochs=10, scheduler=scheduler)

loss: 0.6938, acc: 0.0000: 100%|██████████| 154/154 [00:14<00:00, 10.91it/s]
loss: 0.6935, acc: 0.0391:  13%|█▎        | 20/154 [00:01<00:11, 11.30it/s]

Epoch 00172: reducing learning rate of group 0 to 1.0000e-04.


loss: 0.6935, acc: 0.0234:  21%|██        | 32/154 [00:03<00:10, 11.33it/s]

Epoch 00186: reducing learning rate of group 0 to 1.0000e-05.


loss: 0.6935, acc: 0.0312:  29%|██▊       | 44/154 [00:03<00:09, 11.27it/s]

Epoch 00197: reducing learning rate of group 0 to 1.0000e-06.


loss: 0.6935, acc: 0.0078:  35%|███▌      | 54/154 [00:04<00:08, 11.25it/s]

Epoch 00208: reducing learning rate of group 0 to 1.0000e-07.


loss: 0.6935, acc: 0.0234:  43%|████▎     | 66/154 [00:05<00:07, 11.25it/s]

Epoch 00219: reducing learning rate of group 0 to 1.0000e-08.


loss: 0.6935, acc: 0.0312:  56%|█████▋    | 87/154 [00:07<00:06, 11.11it/s]


KeyboardInterrupt: 