In [1]:
import matplotlib.pyplot as plt
from config import Config
from patient_data_reader import PatientReader
import os
import time
import numpy as np
import pandas as pd

import torch

In [2]:
# Number of units in the hidden (recurrent) layer
N_HIDDEN = 200
# Number of training sequences in each batch

# All gradients above this will be clipped
GRAD_CLIP = 100
# How often should we check the output?
EPOCH_SIZE = 100
# Number of epochs to train the net
num_epochs = 6

MAX_LENGTH = 300

# FLAGS.vocab_size = 619



N_BATCH = 128 # 128, 32, 1

EMBEDSIZE = 100
N_TOPICS = 50

In [32]:
# class Config:
#     """feel free to play with these hyperparameters during training"""
#     dataset = "resource"  # change this to the right data name
#     data_path = "%s" % dataset
#     checkpoint_dir = "checkpoint"
#     decay_rate = 0.95
#     decay_step = 1000
#     n_topics = 50
#     learning_rate = 0.00002
#     vocab_size = 619
#     n_stops = 22 
#     lda_vocab_size = vocab_size - n_stops
#     n_hidden = 200
#     n_layers = 2
#     projector_embed_dim = 100
#     generator_embed_dim = n_hidden
#     dropout = 1.0
#     max_grad_norm = 1.0 #for gradient clipping
#     total_epoch = 5
#     init_scale = 0.075
#     threshold = 0.5 #probability cut-off for predicting label to be 1
#     forward_only = False #indicates whether we are in testing or training mode
#     log_dir = 'logs'

<config.Config at 0x19f544be2e0>

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def prepare_data(seqs, labels, vocabsize, maxlen=None):
    """Create the matrices from the datasets.

    This pad each sequence to the same lenght: the lenght of the
    longuest sequence or maxlen.

    if maxlen is set, we will cut all sequence to this maximum
    lenght.

    This swap the axis!
    """
    # x: a list of sentences
    lengths = [len(s) for s in seqs]

    eventSeq = []

    for seq in seqs:
        t = []
        for visit in seq:
            t.extend(visit)
        eventSeq.append(t)
    eventLengths = [len(s) for s in eventSeq]

    if maxlen is not None:
        new_seqs = []
        new_lengths = []
        new_labels = []
        for l, s, la in zip(lengths, seqs, labels):
            if l < maxlen:
                new_seqs.append(s)
                new_lengths.append(l)
                new_labels.append(la)
            else:
                new_seqs.append(s[:maxlen])
                new_lengths.append(maxlen)
                new_labels.append(la[:maxlen])
        lengths = new_lengths
        seqs = new_seqs
        labels = new_labels

        if len(lengths) < 1:
            return None, None, None

    n_samples = len(seqs)
    maxlen = max(maxlen, np.max(lengths)) # changed this line to always to goto max_len as we use in pytroch with batches

    x = np.zeros((n_samples, maxlen, vocabsize)).astype('int64')
    x_mask = np.zeros((n_samples, maxlen)).astype(float)
    y = np.ones((n_samples, maxlen)).astype(float)
    for idx, s in enumerate(seqs):
        x_mask[idx, :lengths[idx]] = 1
        for j, sj in enumerate(s):
            for tsj in sj:
                x[idx, j, tsj - 1] = 1
    for idx, t in enumerate(labels):
        y[idx, :lengths[idx]] = t
        # if lengths[idx] < maxlen:
        #     y[idx,lengths[idx]:] = t[-1]
    
#     # randomly generated list of labels. for testing. note that this size is n_samples,1 and not n_samples,n_visits
#     y = torch.randint(0, 2, (n_samples,)) #.astype(float)
    return x, x_mask, y, lengths, eventLengths

In [5]:
import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [6]:
class CustomDataset(Dataset):
    def __init__(self, seqs, hfs):
        self.x = seqs
        self.y = hfs
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        return (self.x[index], self.y[index])
    
FLAGS = Config()
data_sets = PatientReader(FLAGS)

def get_custom_dataset(dtype):
    """ dtype in train, valid, test"""
    X_raw_data, Y_raw_data = data_sets.get_data_from_type(dtype)
    dataset = CustomDataset(X_raw_data, Y_raw_data)
    print(f"legth of dataset of dtype = {dtype}:", len(X_raw_data))
    return dataset

train_dataset = get_custom_dataset("train")
val_dataset = get_custom_dataset("valid")
test_dataset =get_custom_dataset("test")

 [*] load resource\vocab.pkl
 [*] load resource/X_train.pkl
 [*] load resource/Y_train.pkl
 [*] load resource/X_valid.pkl
 [*] load resource/Y_valid.pkl
 [*] load resource/X_test.pkl
 [*] load resource/Y_test.pkl
vocabulary size: 619
number of training documents: 2000
number of validation documents: 500
number of testing documents: 500
legth of dataset of dtype = train: 2000
legth of dataset of dtype = valid: 500
legth of dataset of dtype = test: 500


In [7]:
def collate_fn(data):
    """
     Collate the the list of samples into batches. For each patient, you need to pad the diagnosis
        sequences to the sample shape (max # visits, max # diagnosis codes). The padding infomation
        is stored in `mask`.
    
    Arguments:
        data: a list of samples fetched from `CustomDataset`
        
    Outputs:
        x: a tensor of shape (# patiens, max # visits, max # diagnosis codes) of type torch.long
        masks: a tensor of shape (# patiens, max # visits, max # diagnosis codes) of type torch.bool
        rev_x: same as x but in reversed time. This will be used in our RNN model for masking 
        rev_masks: same as mask but in reversed time. This will be used in our RNN model for masking
        y: a tensor of shape (# patiens) of type torch.float
        
    Note that you can obtains the list of diagnosis codes and the list of hf labels
        using: `sequences, labels = zip(*data)`
    """

    sequences, labels = zip(*data)

    x, x_mask, y, lengths, eventLengths = prepare_data(seqs=sequences, labels=labels, vocabsize=FLAGS.vocab_size, maxlen=MAX_LENGTH)
    
    x = torch.tensor(x, dtype=torch.float)
    x_mask = torch.tensor(x_mask, dtype=torch.bool)
    y = torch.tensor(y, dtype=torch.float)

    return x, x_mask, y, lengths, eventLengths


In [8]:
# from torch.utils.data.dataset import random_split

# split = int(len(dataset)*0.5)

# lengths = [split, len(dataset) - split]
# train_dataset, val_dataset = random_split(dataset, lengths)

# print("Length of train dataset:", len(train_dataset))
# print("Length of val dataset:", len(val_dataset))


In [9]:
from torch.utils.data import DataLoader

def load_data(train_dataset, val_dataset,test_dataset, collate_fn,batch_size=128):
    
    '''
    Implement this function to return the data loader for  train and validation dataset. 
    Set batchsize to batch_size. Set `shuffle=True` only for train dataloader.
    
    Arguments:
        train dataset: train dataset of type `CustomDataset`
        val dataset: validation dataset of type `CustomDataset`
        test dataset: test dataset of type `CustomDataset`
        
        collate_fn: collate function
        
    Outputs:
        train_loader, val_loader, test_dataset : train and validation and test dataloaders
    
    Note that you need to pass the collate function to the data loader `collate_fn()`.
    '''
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = load_data(train_dataset, val_dataset,test_dataset, collate_fn,batch_size = N_BATCH)

In [10]:
def get_last_visit(hidden_states, masks):
#     print(hidden_states.shape)  # torch.Size([32, 175, 256])

#     print(masks.shape) #torch.Size([32, 175])
#     """
#      obtain the hidden state for the last true visit (not padding visits)

#     Arguments:
#         hidden_states: the hidden states of each visit of shape (batch_size, # visits, embedding_dim)
#         masks: the padding masks of shape (batch_size, # visits, # diagnosis codes)

#     Outputs:
#         last_hidden_state: the hidden state for the last true visit of shape (batch_size, embedding_dim)
        
#     NOTE: DO NOT use for loop.
    
#     HINT: First convert the mask to a vector of shape (batch_size,) containing the true visit length; 
#           and then use this length vector as index to select the last visit.
#     """

    mask_length = masks.count_nonzero(dim=1)
    return hidden_states[range(hidden_states.shape[0]),mask_length-1,:]

# GRU Net Model

In [11]:
# input = torch.randn(batch_size, sequence_length, input_size)

print_flag = False
class GRUModel(nn.Module):
    def __init__(self):
        super(GRUModel, self).__init__()

        self.gru = nn.GRU(input_size=FLAGS.vocab_size, hidden_size=N_HIDDEN, batch_first=True)
        self.fc = nn.Linear(in_features= N_HIDDEN, out_features=MAX_LENGTH)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, masks):
        if print_flag: print( "x" ,x.shape)
        if print_flag: print( "masks",masks.shape)
        batch_size = x.shape[0]
        if print_flag: print( "batch_size", batch_size)
        output, h_n = self.gru(x)
        if print_flag: print( "output", output.shape)
        if print_flag: print( "h_n", h_n.shape)
        true_h_n = get_last_visit(output, masks)
        if print_flag: print( "true_h_n",true_h_n.shape)
        logits = self.fc(true_h_n)   
        if print_flag: print( "logits",logits.shape)
        probs = self.sigmoid(logits)
        if print_flag: print( "probs",probs.shape)
        probs_ret = probs.view((batch_size,-1))
        if print_flag: print( "probs_ret",probs_ret.shape)
        return probs_ret
    
## H0 defaults to zeros if not provided.
#     def initHidden(self):
#         return torch.zeros(1, N_HIDDEN)

In [12]:
naive_rnn = GRUModel()
naive_rnn

GRUModel(
  (gru): GRU(619, 200, batch_first=True)
  (fc): Linear(in_features=200, out_features=300, bias=True)
  (sigmoid): Sigmoid()
)

In [13]:
train_iter = iter(train_loader)
x, x_mask, y, lengths, eventLengths = next(train_iter)
x.shape, y.shape

(torch.Size([128, 300, 619]), torch.Size([128, 300]))

In [14]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(naive_rnn.parameters(), lr=0.001)

In [15]:
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score


def eval_model(model, data_loader):
    
    """
    evaluate the model.
    
    Arguments:
        model: the RNN model
        val_loader: validation dataloader
        
    Outputs:
        precision: overall precision score
        recall: overall recall score
        f1: overall f1 score
        roc_auc: overall roc_auc score
        
    Note that please pass all four arguments to the model so that we can use this function for both 
    models. (Use `model(x, masks, rev_x, rev_masks)`.)
        
    HINT: checkout https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
    """
    
    model.eval()
    y_pred = torch.LongTensor()
    y_score = torch.Tensor()
    y_true = torch.LongTensor()
    model.eval()
    for x, x_mask, y, lengths, eventLengths in data_loader:
        y_hat = model(x, x_mask)
        y_score = torch.cat((y_score,  y_hat.detach().to(device)), dim=0)
        y_hat = (y_hat > 0.5).int()
        y_pred = torch.cat((y_pred,  y_hat.detach().to(device)), dim=0)
        y_true = torch.cat((y_true, y.detach().to(device)), dim=0)
    """
        Calculate precision, recall, f1, and roc auc scores.
        Use `average='binary'` for calculating precision, recall, and fscore.
    """
#     print(y_pred.shape, y_true.shape)
    p, r, f, _ = precision_recall_fscore_support(torch.flatten(y_true), (np.array(y_pred)>0.5).flatten(), average='binary')
    roc_auc = roc_auc_score(torch.flatten(y_true), torch.flatten(y_score))
    return p, r, f, roc_auc

In [16]:
def train(model, train_loader, val_loader, n_epochs):
    """
    train the model.
    
    Arguments:
        model: the RNN model
        train_loader: training dataloder
        val_loader: validation dataloader
        n_epochs: total number of epochs
        
    You need to call `eval_model()` at the end of each training epoch to see how well the model performs 
    on validation data.
        
    Note that please pass all four arguments to the model so that we can use this function for both 
    models. (Use `model(x, masks, rev_x, rev_masks)`.)
    """
    
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for x, x_mask, y, lengths, eventLengths in train_loader:
            """
                1. zero grad
                2. model forward
                3. calculate loss
                4. loss backward
                5. optimizer step
            """
            outputs = model(x, x_mask)
#             print("outputs",outputs.shape)
#             print("y",y.shape)
            loss = criterion(outputs, y) 
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        p, r, f, roc_auc = eval_model(model, val_loader)
        print('Epoch: {} \t Validation p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}'
              .format(epoch+1, p, r, f, roc_auc))

In [17]:
n_epochs = num_epochs
train(naive_rnn, train_loader, val_loader, n_epochs)

Epoch: 1 	 Training Loss: 0.559083
Epoch: 1 	 Validation p: 0.91, r:0.92, f: 0.92, roc_auc: 0.90
Epoch: 2 	 Training Loss: 0.285555
Epoch: 2 	 Validation p: 0.91, r:0.92, f: 0.92, roc_auc: 0.92
Epoch: 3 	 Training Loss: 0.273508
Epoch: 3 	 Validation p: 0.91, r:0.92, f: 0.92, roc_auc: 0.92
Epoch: 4 	 Training Loss: 0.271554
Epoch: 4 	 Validation p: 0.91, r:0.92, f: 0.92, roc_auc: 0.92
Epoch: 5 	 Training Loss: 0.270376
Epoch: 5 	 Validation p: 0.92, r:0.92, f: 0.92, roc_auc: 0.93
Epoch: 6 	 Training Loss: 0.269131
Epoch: 6 	 Validation p: 0.92, r:0.92, f: 0.92, roc_auc: 0.93


In [18]:
p, r, f, roc_auc = eval_model(naive_rnn, val_loader)
print(roc_auc)
assert roc_auc > 0.7, "ROC AUC is too low on the validation set (%f < 0.7)"%(roc_auc)


p, r, f, roc_auc = eval_model(naive_rnn, test_loader)
print(roc_auc)
assert roc_auc > 0.7, "ROC AUC is too low on the test set (%f < 0.7)"%(roc_auc)


0.9260353617118455
0.9255035281296328


In [19]:
p, r, f, roc_auc

(0.9219502761966419,
 0.9222526871772444,
 0.9221014568923833,
 0.9255035281296328)

# Recognition Net  Model

In [24]:
class RecognitionNet(nn.Module):
    def __init__(self):
        super(RecognitionNet, self).__init__()
        
        self.fc1 = nn.Linear(1473, 64)
        
        l_in = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH, N_VOCAB))
        l_in = nn.Sequential(
            nn.Linear(in_features=N_VOCAB, out_features=N_VOCAB),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=N_VOCAB, out_features=MAX_LENGTH * N_VOCAB),
            nn.Reshape(N_BATCH, MAX_LENGTH, N_VOCAB)
        )

In [None]:
# Chat gpt output
import torch
import torch.nn as nn
import torch.nn.functional as F


class ThetaLayer(nn.Module):
    def __init__(self, mu_layer, logsigma_layer, maxlen):
        super().__init__()
        self.mu = mu_layer
        self.logsigma = logsigma_layer
        self.klterm = 0
        self.theta = nn.Parameter(torch.zeros(1, logsigma_layer.out_features))
        self.maxlen = maxlen

    def forward(self, x):
        logsigma_in = self.logsigma(x[0])
        mu_in = self.mu(x[1])
        kltermFn = lambda logsigma, mu: 0.5 * (1 + logsigma * 2 - (mu ** 2) - torch.exp(logsigma) ** 2)
        self.klterm = (0.5 * (1 + logsigma_in * 2) - (mu_in ** 2) - (torch.exp(logsigma_in) ** 2))
        out = lambda mu, logsigma, input: (1 / (input * (torch.exp(logsigma) * (2 * np.pi) ** (1 / 2)))) * torch.exp(
            -((torch.log(input) - mu) ** 2) / (2 * (torch.exp(logsigma) ** 2)))
        self.theta = nn.Parameter(
            out(mu_in, logsigma_in, x).reshape(x[0].size(0), self.maxlen, self.logsigma.out_features))
        return self.theta
    
    
    
    
class ExpressionLayer(nn.Module):
    def __init__(self, expression, output_shape):
        super(ExpressionLayer, self).__init__()
        self.expression = expression
        self.output_shape_ = output_shape

    def forward(self, x):
        return self.expression(x)

    def output_shape(self, input_shape):
        return (input_shape[0],) + self.output_shape_

In [None]:
# Chat gpt output
class Net(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_topics, max_length):
        super(Net, self).__init__()
        
        self.l_in = nn.Linear(num_inputs, num_hidden)
        self.l_1 = nn.Linear(num_hidden, N_HIDDEN)
        self.l_2 = nn.Linear(N_HIDDEN, N_HIDDEN)
        self.mu = nn.Linear(N_HIDDEN, n_topics)
        self.log_sigma = nn.Linear(N_HIDDEN, n_topics)
        self.l_theta = ThetaLayer([self.mu, self.log_sigma], maxlen=MAX_LENGTH) 
        
        self.l_B = nn.Linear(num_inputs, n_topics)
        self.l_context = nn.Multiplication()
        
        #####
        #####
        #### Yiming at 2023-04-09 HUGE BUG!!!!
        self.l_context = nn.Lambda(lambda X: torch.mean(X, dim=-1))
        
        self.l_forward0 = nn.Linear(num_hidden, 1)
        self.l_dense0 = nn.Linear(num_hidden, 1)
        self.l_dense1 = nn.Flatten()
        self.l_dense = nn.Add()
        self.l_out0 = nn.Sigmoid()
        self.l_out = nn.Lambda(lambda X: X + 0.000001)

    def forward(self, x, mask):
        # Compute the network's forward pass
        l_in = self.l_in(x)
        l_1 = nn.ReLU()(l_in)
        l_2 = nn.ReLU()(l_1)
        mu = self.mu(l_2)
        log_sigma = self.log_sigma(l_2)
        l_theta = ThetaLayer([mu, log_sigma], maxlen=MAX_LENGTH) 

        l_B = self.l_B(x)
        l_context = self.l_context([l_B, l_theta])
        
        l_forward0 = self.l_forward0(l_2)
        l_dense0 = self.l_dense0(l_2)
        l_dense1 = self.l_dense1(l_forward0)
        l_dense = self.l_dense([l_dense1, l_context])
        l_out0 = self.l_out0(l_dense)
        l_out = self.l_out([l_out0, mask])
        
        return l_out

# The CONTENT Model

In [29]:
class CONTENT(nn.Module):
    def __init__(self):
        super(CONTENT, self).__init__()
        
        self.gru = GRUModel()
        self.recog = RecognitionNet()
        self.log_reg = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        gru_out = self.gru(x)
        recog_out = self.recog(x)
        z_out =  W * gru_out + B * recog_out
        z_out = self.linear(z_out)
        y_pred = torch.sigmoid(z_out)
        return y_pred

In [30]:
content_model = CONTENT()

NameError: name 'lasagne' is not defined

In [31]:
criterion = nn.BCELoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(content_model.parameters(),lr=learning_rate)

NameError: name 'content_model' is not defined

In [None]:
# chatgpt output
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class ThetaLayer(nn.Module):
    def __init__(self, mu, log_sigma, maxlen):
        super(ThetaLayer, self).__init__()
        self.mu = mu
        self.log_sigma = log_sigma
        self.maxlen = maxlen
        self.klterm = None

    def forward(self, x):
        self.mu = F.dropout(self.mu, p=0.5, training=self.training)
        self.log_sigma = F.dropout(self.log_sigma, p=0.5, training=self.training)
        epsilon = torch.randn(self.mu.size(), device=x.device)
        z = self.mu + torch.exp(self.log_sigma / 2) * epsilon
        self.klterm = -0.5 * torch.mean(
            torch.sum(1 + 2 * self.log_sigma - self.mu.pow(2) - torch.exp(2 * self.log_sigma), dim=2), dim=1)
        theta = F.pad(z, (0, self.maxlen - z.size()[1], 0, 0))
        return theta

class Model(nn.Module):
    def __init__(self, vocab_size, embed_size, n_hidden, n_topics, maxlen):
        super(Model, self).__init__()
        self.embed = nn.Linear(vocab_size, embed_size, bias=False)
        self.gru = nn.GRU(embed_size, n_hidden, bidirectional=False, batch_first=True)
        self.dense1 = nn.Linear(vocab_size, n_hidden)
        self.dense2 = nn.Linear(n_hidden, n_hidden)
        self.mu = nn.Linear(n_hidden, n_topics)
        self.log_sigma = nn.Linear(n_hidden, n_topics)
        self.theta = ThetaLayer(None, None, maxlen)
        self.B = nn.Linear(vocab_size, n_topics, bias=False)
        self.context = nn.Sequential(nn.Linear(n_topics, 1, bias=False), nn.Flatten(1))

    def forward(self, x, mask):
        x_embed = self.embed(x)
        h, _ = self.gru(x_embed)
        h_masked = h * mask[:, :, None]
        h_masked = h_masked.reshape((-1, h_masked.size()[2]))
        h1 = F.relu(self.dense1(x))
        h2 = F.relu(self.dense2(h1))
        mu = self.mu(h2)
        log_sigma = self.log_sigma(h2)
        theta = self.theta(x)
        B = self.B(x)
        context = self.context(B)
        dense = self.context(h_masked) + context.unsqueeze(1)
        out0 = torch.sigmoid(dense)
        out = out0 * mask[:, :, None] + 0.000001
        return out

# Define hyperparameters
N_BATCH = 1
MAX_LENGTH = 100
N_VOCAB = 10000
EMBED_SIZE = 100
N_HIDDEN = 256
N_TOPICS = 50
LEARNING_RATE = 0.001

# Define model and optimizer
model = Model(N_VOCAB, EMBED_SIZE, N_HIDDEN, N_TOPICS, MAX_LENGTH)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Define training function
def train_fn(input_var, target_values, mask_var):
    optimizer.zero_grad()
    input_tensor = torch.tensor(input_var, dtype=torch.float32)
    target_tensor = torch.tensor(target_values, dtype=torch.float32)
    mask_tensor = torch.tensor(mask_var, dtype=torch.float32)
    output = model(input_tensor, mask_tensor)
   



In [None]:
# chat gpt output


import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, accuracy_score
import time
from tqdm import tqdm

# define the model architecture
class Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Model, self).__init__()
        self.rnn = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, mask):
        x = self.rnn(x)[0]
        x = x * mask.unsqueeze(-1)
        x = x.sum(dim=1) / mask.sum(dim=1).unsqueeze(-1)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

# define the training function
def train(model, optimizer, criterion, x, y, mask):
    optimizer.zero_grad()
    output = model(x, mask)
    loss = criterion(output.flatten(), y)
    loss.backward()
    optimizer.step()
    return loss.item()

# define the function to compute cost
def compute_cost(model, x, y, mask):
    output = model(x, mask)
    loss = nn.BCELoss()(output.flatten(), y)
    return loss.item()

# define the function to output the hidden state
def output_theta(model, x, mask):
    with torch.no_grad():
        model.eval()
        output, hn = model.rnn(x)
        output = output * mask.unsqueeze(-1)
        output = output.sum(dim=1) / mask.sum(dim=1).unsqueeze(-1)
    return hn.detach().numpy().reshape(x.shape[0], -1), output.detach().numpy()

# define the function to iterate through minibatches
def iterate_minibatches_listinputs(inputs_list, batch_size, shuffle=False):
    if shuffle:
        indices = np.arange(len(inputs_list[0]))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs_list[0]), batch_size):
        end_idx = start_idx + batch_size
        if end_idx > len(inputs_list[0]):
            end_idx = len(inputs_list[0])
        if shuffle:
            excerpt = indices[start_idx:end_idx]
        else:
            excerpt = slice(start_idx, end_idx)
        yield [torch.from_numpy(inputs[excerpt]).float() for inputs in inputs_list]

# set the seed for reproducibility
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

# load the data
trainingAdmiSeqs = np.load("trainingAdmiSeqs.npy")
trainingLabels = np.load("trainingLabels.npy")
trainingMask = np.load("trainingMask.npy")
test_admiSeqs = np.load("test_admiSeqs.npy")
test_labels = np.load("test_labels.npy")
test_mask = np.load("test_mask.npy")
testLengths = np.load("testLengths.npy")

# set the hyperparameters
num_epochs = 10
batch_size = 128
input_dim = trainingAdmiSeqs.shape[-1]
hidden_dim = 128
output_dim = 1
lr = 0.001

# create the model, optimizer and criterion
model = Model(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

# train the model
for epoch in range(num_epochs):
    train_err = 0
    train_batches = 0
   

In [None]:
# chat gpt output


import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, accuracy_score
import time
from tqdm import tqdm

# define the model architecture
class Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Model, self).__init__()
        self.rnn = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, mask):
        x = self.rnn(x)[0]
        x = x * mask.unsqueeze(-1)
        x = x.sum(dim=1) / mask.sum(dim=1).unsqueeze(-1)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

# define the training function
def train(model, optimizer, criterion, x, y, mask):
    optimizer.zero_grad()
    output = model(x, mask)
    loss = criterion(output.flatten(), y)
    loss.backward()
    optimizer.step()
    return loss.item()

# define the function to compute cost
def compute_cost(model, x, y, mask):
    output = model(x, mask)
    loss = nn.BCELoss()(output.flatten(), y)
    return loss.item()

# define the function to output the hidden state
def output_theta(model, x, mask):
    with torch.no_grad():
        model.eval()
        output, hn = model.rnn(x)
        output = output * mask.unsqueeze(-1)
        output = output.sum(dim=1) / mask.sum(dim=1).unsqueeze(-1)
    return hn.detach().numpy().reshape(x.shape[0], -1), output.detach().numpy()

# define the function to iterate through minibatches
def iterate_minibatches_listinputs(inputs_list, batch_size, shuffle=False):
    if shuffle:
        indices = np.arange(len(inputs_list[0]))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs_list[0]), batch_size):
        end_idx = start_idx + batch_size
        if end_idx > len(inputs_list[0]):
            end_idx = len(inputs_list[0])
        if shuffle:
            excerpt = indices[start_idx:end_idx]
        else:
            excerpt = slice(start_idx, end_idx)
        yield [torch.from_numpy(inputs[excerpt]).float() for inputs in inputs_list]

# set the seed for reproducibility
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

# load the data
trainingAdmiSeqs = np.load("trainingAdmiSeqs.npy")
trainingLabels = np.load("trainingLabels.npy")
trainingMask = np.load("trainingMask.npy")
test_admiSeqs = np.load("test_admiSeqs.npy")
test_labels = np.load("test_labels.npy")
test_mask = np.load("test_mask.npy")
testLengths = np.load("testLengths.npy")

# set the hyperparameters
num_epochs = 10
batch_size = 128
input_dim = trainingAdmiSeqs.shape[-1]
hidden_dim = 128
output_dim = 1
lr = 0.001

# create the model, optimizer and criterion
model = Model(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

# train the model
for epoch in range(num_epochs):
    train_err = 0
    train_batches = 0
   