In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

TABLES_DIRECTORY = "../../Data/giant_tables"

In [2]:
patient_hup_ids = []
# Iterate through all files in TABLES_DIRECTORY
for filename in os.listdir(TABLES_DIRECTORY):
    # Only look at filename that are .csv files and does not begin with .
    if filename.endswith(".csv") and not filename.startswith("."):
        # Get the patient_hup_id from the filename which is after _ and before .
        patient_hup_id = filename.split("_")[1].split(".")[0]
        patient_hup_ids.append(patient_hup_id)

patient_hup_ids = sorted(patient_hup_ids)
len(patient_hup_ids)

62

In [3]:
from sklearn.preprocessing import MinMaxScaler


def preprocess_data(data, lookback=5):
    X, Y = [], []
    for i in range(len(data) - lookback - 1):
        X.append(data[i : (i + lookback)])
        Y.append(data["num_seizures"][i + lookback] > 0)

    # scale features between 0 and 1
    scaler = MinMaxScaler(feature_range=(0, 1))
    X = np.array(X)
    X = scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)

    # labels are already binary, so no need to scale
    Y = np.array(Y)
    return X, Y

In [4]:
import torch
import torch.nn as nn


class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

In [5]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

In [6]:
# Assume device is already defined
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [22]:
import torch.optim as optim

# Hyperparameters
input_size = 30  # adjust as necessary
hidden_size = 50
num_layers = 2
output_size = 1
num_epochs = 100
learning_rate = 0.001
batch_size = 20

for patient_hup_id in patient_hup_ids:
    # Read in the giant table for this patient
    hourly_patient_features_df = pd.read_csv(
        os.path.join(TABLES_DIRECTORY, f"HUP_{patient_hup_id}.csv")
    )

    # Fill NaN values with 0
    hourly_patient_features_df = hourly_patient_features_df.fillna(0)

    # Preprocess the data
    X, Y = preprocess_data(hourly_patient_features_df)

    # Split the data into training and testing sets (you can adjust the proportions as needed)
    train_proportion = 0.3
    train_index = int(len(X) * train_proportion)
    X_train, X_test = X[:train_index], X[train_index:]
    Y_train, Y_test = Y[:train_index], Y[train_index:]

    # Convert the data into PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    Y_train = torch.tensor(Y_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    Y_test = torch.tensor(Y_test, dtype=torch.float32)

    # Create dataloaders
    train_data = torch.utils.data.TensorDataset(X_train, Y_train)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)

    # Define the models
    lstm_model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)
    gru_model = GRUModel(input_size, hidden_size, num_layers, output_size).to(device)

    # Define the loss and optimizer
    criterion = nn.BCELoss()
    lstm_optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)
    gru_optimizer = optim.Adam(gru_model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for i, (features, labels) in enumerate(train_loader):
            features = features.to(device)
            labels = labels.to(device).view(
                -1, 1
            )  # reshape labels to match output shape

            # Forward pass and loss computation
            lstm_outputs = lstm_model(features)
            lstm_loss = criterion(lstm_outputs, labels)

            gru_outputs = gru_model(features)
            gru_loss = criterion(gru_outputs, labels)

            # Backward pass and optimization
            lstm_optimizer.zero_grad()
            lstm_loss.backward()
            lstm_optimizer.step()

            gru_optimizer.zero_grad()
            gru_loss.backward()
            gru_optimizer.step()

            if (i + 1) % 100 == 0:
                print(
                    f"Epoch {epoch+1}/{num_epochs}, Step {i+1}/{len(train_loader)}, LSTM Loss: {lstm_loss.item()}, GRU Loss: {gru_loss.item()}"
                )

    # Testing the models (you can use an appropriate performance metric such as accuracy, F1 score, AUC-ROC, etc.)
    with torch.no_grad():
        lstm_outputs = (lstm_model(X_test.to(device)) > 0.5).cpu().numpy()
        gru_outputs = (gru_model(X_test.to(device)) > 0.5).cpu().numpy()

        lstm_accuracy = np.mean(lstm_outputs == Y_test.numpy())
        gru_accuracy = np.mean(gru_outputs == Y_test.numpy())

        print(
            f"Patient {patient_hup_id}, LSTM Accuracy: {lstm_accuracy}, GRU Accuracy: {gru_accuracy}"
        )

Patient 138, LSTM Accuracy: 0.9565217391304348, GRU Accuracy: 0.9565217391304348
Patient 140, LSTM Accuracy: 0.9772727272727273, GRU Accuracy: 0.9772727272727273
Patient 141, LSTM Accuracy: 0.7916666666666666, GRU Accuracy: 0.49166666666666664
