In [None]:
import sys
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score

# append the filepath to where torch is installed
sys.path.append('/home/username/.local/lib/python3.10/site-packages')
# sys.path.append('/home/millerm/.local/lib/python3.10/site-packages')

import torch
import torch.nn as nn
import torch.optim as optim

## Load and Preprocess Data

In [None]:
file_name = "ml4h_data/project2/project2_TS_input/ptbdb_train.csv"
df_train = pd.read_csv(file_name,header=None)
x_train = df_train.iloc[:, df_train.columns != 187]
x_train = x_train.values.reshape(-1, 1, 187)
train_target = df_train.iloc[:, 187]
train_target = train_target.values
train_target

array([1., 1., 0., ..., 1., 1., 1.])

In [None]:
file_name = "ml4h_data/project2/project2_TS_input/ptbdb_test.csv"
df_test = pd.read_csv(file_name,header=None)
x_test = df_test.iloc[:, df_test.columns != 187]
x_test = x_test.values.reshape(-1, 1, 187)
test_target = df_test.iloc[:, 187]
test_target = test_target.values
test_target

array([0., 1., 0., ..., 1., 1., 0.])

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [None]:
from setsloaders import create_datasets, create_loaders

In [None]:
datasets = create_datasets(x_train, x_test, train_target, test_target, seed=123)
trn_dl, val_dl, tst_dl = create_loaders(datasets, bs=256)

## Bidirectional LSTM

In [None]:
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm_forward = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.lstm_backward = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x):
        h0_forward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0_forward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out_forward, _ = self.lstm_forward(x, (h0_forward, c0_forward))
        h0_backward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0_backward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out_backward, _ = self.lstm_backward(torch.flip(x, [1]), (h0_backward, c0_backward))
        out_bidirectional = torch.cat((out_forward, torch.flip(out_backward, [1])), dim=2)
        out = self.fc(out_bidirectional[:, -1, :])

        return out

In [None]:
input_size = 187
hidden_size = 32
num_layers = 1
output_size = 1
train_loader = trn_dl
test_loader = tst_dl
num_epochs = 30
learning_rate = 0.01

In [None]:
model = BiLSTM(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        labels = labels.float()
        labels = labels.unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float().squeeze()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Accuracy: {:.2f}%'.format(accuracy))

Epoch 1, Loss: 0.5219498952714409
Epoch 2, Loss: 0.40700392679470343
Epoch 3, Loss: 0.3731217202616901
Epoch 4, Loss: 0.3416722031628213
Epoch 5, Loss: 0.3051563981102734
Epoch 6, Loss: 0.28021641184644
Epoch 7, Loss: 0.24803417521279034
Epoch 8, Loss: 0.22288890182971954
Epoch 9, Loss: 0.20443187472296925
Epoch 10, Loss: 0.18842180182294146
Epoch 11, Loss: 0.17188031280913005
Epoch 12, Loss: 0.15533916325103947
Epoch 13, Loss: 0.1505855810714931
Epoch 14, Loss: 0.1349990824010314
Epoch 15, Loss: 0.12511480127165958
Epoch 16, Loss: 0.12562763182128348
Epoch 17, Loss: 0.11256344925339629
Epoch 18, Loss: 0.10902542421003668
Epoch 19, Loss: 0.10024877274181784
Epoch 20, Loss: 0.09933524778703363
Epoch 21, Loss: 0.08937843934428401
Epoch 22, Loss: 0.08487815010111506
Epoch 23, Loss: 0.0864079075797302
Epoch 24, Loss: 0.07873739792806346
Epoch 25, Loss: 0.07538957657610498
Epoch 26, Loss: 0.06853888456414385
Epoch 27, Loss: 0.07616206858216262
Epoch 28, Loss: 0.0664873220389936
Epoch 29, Lo

In [None]:
model.eval()
predictions = []
ground_truth = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float().squeeze()
        predictions.extend(predicted.cpu().numpy())
        ground_truth.extend(labels.cpu().numpy())

predictions = np.array(predictions)
ground_truth = np.array(ground_truth)

precision = precision_score(ground_truth, predictions, average='binary')
recall = recall_score(ground_truth, predictions, average='binary')
f1 = (2*precision*recall)/(precision+recall)

print('Precision: {:.2f}'.format(precision))
print('Recall: {:.2f}'.format(recall))
print('F1-Score: {:.2f}'.format(f1))

Precision: 0.98
Recall: 0.97
F1-Score: 0.97


In [None]:
torch.save(model, "models/bilstm.pth")