In [1]:
import sys
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score

# append the filepath to where torch is installed
sys.path.append('/home/millerm/.local/lib/python3.10/site-packages')
# sys.path.append('/home/username/.local/lib/python3.10/site-packages')

import torch
import torch.nn as nn
import torch.optim as optim

## Load and Preprocess Data

In [2]:
file_name = "ml4h_data/project2/project2_TS_input/ptbdb_train.csv"
df_train = pd.read_csv(file_name,header=None)
x_train = df_train.iloc[:, df_train.columns != 187]
x_train = x_train.values.reshape(-1, 1, 187)
train_target = df_train.iloc[:, 187]
train_target = train_target.values
train_target

array([1., 1., 0., ..., 1., 1., 1.])

In [3]:
file_name = "ml4h_data/project2/project2_TS_input/ptbdb_test.csv"
df_test = pd.read_csv(file_name,header=None)
x_test = df_test.iloc[:, df_test.columns != 187]
x_test = x_test.values.reshape(-1, 1, 187)
test_target = df_test.iloc[:, 187]
test_target = test_target.values
test_target

array([0., 1., 0., ..., 1., 1., 0.])

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [5]:
from setsloaders import create_datasets, create_loaders

In [6]:
datasets = create_datasets(x_train, x_test, train_target, test_target, seed=123)
trn_dl, val_dl, tst_dl = create_loaders(datasets, bs=256)

## Bidirectional LSTM

In [7]:
class BiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(BiLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm_forward = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=False)
        self.lstm_backward = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=False)
        self.fc = nn.Linear(hidden_size * 2, output_size)
        
    def forward(self, x):
        h0_forward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0_forward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out_forward, _ = self.lstm_forward(x, (h0_forward, c0_forward))
        h0_backward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0_backward = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out_backward, _ = self.lstm_backward(torch.flip(x, [1]), (h0_backward, c0_backward))
        out_bidirectional = torch.cat((out_forward, torch.flip(out_backward, [1])), dim=2)
        out = self.fc(out_bidirectional[:, -1, :])
        
        return out

In [8]:
input_size = 187
hidden_size = 32
num_layers = 1
output_size = 1
train_loader = trn_dl
test_loader = tst_dl
num_epochs = 30
learning_rate = 0.01

In [9]:
model = BiLSTM(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        labels = labels.float()
        labels = labels.unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float().squeeze()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Accuracy: {:.2f}%'.format(accuracy))

Epoch 1, Loss: 0.5100858211517334
Epoch 2, Loss: 0.3967459434416236
Epoch 3, Loss: 0.3581413670283992
Epoch 4, Loss: 0.3227523194580543
Epoch 5, Loss: 0.28520053079942376
Epoch 6, Loss: 0.25312486763407543
Epoch 7, Loss: 0.22546398966777614
Epoch 8, Loss: 0.2032423782639387
Epoch 9, Loss: 0.18131099932077455
Epoch 10, Loss: 0.16713823995939114
Epoch 11, Loss: 0.15464536754823313
Epoch 12, Loss: 0.13993676406581226
Epoch 13, Loss: 0.1293174032031036
Epoch 14, Loss: 0.12189026631233169
Epoch 15, Loss: 0.11399421931766882
Epoch 16, Loss: 0.11612356944781978
Epoch 17, Loss: 0.11049552697960924
Epoch 18, Loss: 0.09237897505120533
Epoch 19, Loss: 0.08301201944307583
Epoch 20, Loss: 0.08480274949858828
Epoch 21, Loss: 0.07862269496772348
Epoch 22, Loss: 0.07194054508354605
Epoch 23, Loss: 0.0712899642019737
Epoch 24, Loss: 0.07174213921151511
Epoch 25, Loss: 0.06014017124728459
Epoch 26, Loss: 0.0603981919404937
Epoch 27, Loss: 0.055510603890913286
Epoch 28, Loss: 0.05319010193754987
Epoch 29

In [10]:
model.eval()
predictions = []
ground_truth = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = (outputs > 0.5).float().squeeze()
        predictions.extend(predicted.cpu().numpy())
        ground_truth.extend(labels.cpu().numpy())

predictions = np.array(predictions)
ground_truth = np.array(ground_truth)

precision = precision_score(ground_truth, predictions, average='binary')  # assuming binary classification
recall = recall_score(ground_truth, predictions, average='binary')  # assuming binary classification
f1 = (2*precision*recall)/(precision+recall)

print('Precision: {:.2f}'.format(precision))
print('Recall: {:.2f}'.format(recall))
print('F1-Score: {:.2f}'.format(f1))

Precision: 0.98
Recall: 0.96
F1-Score: 0.97
