In [1]:
"""

This notebook is a Pytorch implementation of the CONTENT model as presented in
the paper 
"Readmission prediction via deep contextual embedding of clinical concepts" 
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5890980/pdf/pone.0195024.pdf

Original code is available at https://github.com/danicaxiao/CONTENT

authors: Gowtham Kuntumalla, Yiming Li
date: April, 2023
"""

'\n\nThis notebook is a Pytorch implementation of the CONTENT model as presented in\nthe paper \n"Readmission prediction via deep contextual embedding of clinical concepts" \nhttps://www.ncbi.nlm.nih.gov/pmc/articles/PMC5890980/pdf/pone.0195024.pdf\n\nOriginal code is available at https://github.com/danicaxiao/CONTENT\n\nauthors: Gowtham Kuntumalla, Yiming Li\ndate: April, 2023\n'

In [2]:
# Basic imports
# futher imports done belo
import matplotlib.pyplot as plt
from patient_data_reader import PatientReader
import os
import time
import numpy as np
import pandas as pd

import torch

from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, accuracy_score
from sklearn.metrics import average_precision_score as pr_auc_score



import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [3]:
from config import Config

# repeated here for ease of readability
class Config:
    """feel free to play with these hyperparameters during training"""
    dataset = "resource"  # change this to the right data name
    data_path = "%s" % dataset
    checkpoint_dir = "checkpoint"
    decay_rate = 0.95
    decay_step = 1000
    n_topics = 50
    learning_rate = 0.001 # 0.00002
    vocab_size = 619
    n_stops = 22 
    lda_vocab_size = vocab_size - n_stops
    n_hidden = 200
    n_layers = 2
    projector_embed_dim = 100
    generator_embed_dim = n_hidden
    dropout = 1.0
    max_grad_norm = 1.0 #for gradient clipping
    total_epoch = 6
    init_scale = 0.075
    threshold = 0.5 #probability cut-off for predicting label to be 1
    forward_only = False #indicates whether we are in testing or training mode
    log_dir = 'logs'
    
    
FLAGS = Config()

# Define hyperparameters
N_HIDDEN = FLAGS.n_hidden
NUM_EPOCHS = FLAGS.total_epoch
N_VOCAB = FLAGS.vocab_size
EMBED_SIZE = FLAGS.projector_embed_dim
N_HIDDEN = FLAGS.n_hidden
N_TOPICS = FLAGS.n_topics
LEARNING_RATE = FLAGS.learning_rate
THRESHOLD = FLAGS.threshold

N_BATCH = 1#1 # 128 # 128, 32, 1
MAX_LENGTH = 300


In [4]:
# useful for when GPU is available else use CPU as default.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
def prepare_data(seqs, labels, vocabsize, maxlen=None):
    """Create the matrices from the datasets.

    This pad each sequence to the same lenght: the length of the
    longuest sequence or maxlen.

    if maxlen is set, we will cut all sequence to this maximum
    lenght.

    This swap the axis!
    
    We modified some parts of this code from the original paper
    """
    # x: a list of sentences
    lengths = [len(s) for s in seqs]

    eventSeq = []

    for seq in seqs:
        t = []
        for visit in seq:
            t.extend(visit)
        eventSeq.append(t)
    eventLengths = [len(s) for s in eventSeq]

    if maxlen is not None:
        new_seqs = []
        new_lengths = []
        new_labels = []
        for l, s, la in zip(lengths, seqs, labels):
            if l < maxlen:
                new_seqs.append(s)
                new_lengths.append(l)
                new_labels.append(la)
            else:
                new_seqs.append(s[:maxlen])
                new_lengths.append(maxlen)
                new_labels.append(la[:maxlen])
        lengths = new_lengths
        seqs = new_seqs
        labels = new_labels

        if len(lengths) < 1:
            return None, None, None

    n_samples = len(seqs)
    maxlen = max(maxlen, np.max(lengths)) # changed this line to always to goto max_len as we use in pytroch with batches

    x = np.zeros((n_samples, maxlen, vocabsize)).astype('int64')
    x_mask = np.zeros((n_samples, maxlen)).astype(float)
    y = np.ones((n_samples, maxlen)).astype(float)
    for idx, s in enumerate(seqs):
        x_mask[idx, :lengths[idx]] = 1
        for j, sj in enumerate(s):
            for tsj in sj:
                x[idx, j, tsj - 1] = 1
    for idx, t in enumerate(labels):
        y[idx, :lengths[idx]] = t
        # if lengths[idx] < maxlen:
        #     y[idx,lengths[idx]:] = t[-1]
    
#     # randomly generated list of labels. for testing. note that this size is n_samples,1 and not n_samples,n_visits
#     y = torch.randint(0, 2, (n_samples,)) #.astype(float)
    return x, x_mask, y, lengths, eventLengths

In [6]:

# create a custom dataset for use in the pytorch pipelines.
class CustomDataset(Dataset):
    def __init__(self, seqs, hfs):
        self.x = seqs
        self.y = hfs
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        return (self.x[index], self.y[index])
    

# This patientreader class helps in reading the raw data and converting it to more amenable format.
data_sets = PatientReader(FLAGS)

def get_custom_dataset(dtype):
    """ dtype in train, valid, test"""
    X_raw_data, Y_raw_data = data_sets.get_data_from_type(dtype)
    dataset = CustomDataset(X_raw_data, Y_raw_data)
    print(f"legth of dataset of dtype = {dtype}:", len(X_raw_data))
    return dataset


# create train, test and validation
train_dataset = get_custom_dataset("train")
val_dataset = get_custom_dataset("valid")
test_dataset =get_custom_dataset("test")

 [*] load resource\vocab.pkl
 [*] load resource/X_train.pkl
 [*] load resource/Y_train.pkl
 [*] load resource/X_valid.pkl
 [*] load resource/Y_valid.pkl
 [*] load resource/X_test.pkl
 [*] load resource/Y_test.pkl
vocabulary size: 619
number of training documents: 2000
number of validation documents: 500
number of testing documents: 500
legth of dataset of dtype = train: 2000
legth of dataset of dtype = valid: 500
legth of dataset of dtype = test: 500


In [7]:
def collate_fn(data):
    """
    function required when visit lenghts are unequal. Masks are the tensors whcih store flags of real visits.
    used in dataloader.
    """
    sequences, labels = zip(*data)

    x, x_mask, y, lengths, eventLengths = prepare_data(seqs=sequences, labels=labels, vocabsize=N_VOCAB, maxlen=MAX_LENGTH)
    
    x = torch.tensor(x, dtype=torch.float)
    x_mask = torch.tensor(x_mask, dtype=torch.bool)
    y = torch.tensor(y, dtype=torch.float)

    return x, x_mask, y, lengths, eventLengths


In [8]:
# from torch.utils.data.dataset import random_split

# split = int(len(dataset)*0.5)

# lengths = [split, len(dataset) - split]
# train_dataset, val_dataset = random_split(dataset, lengths)

# print("Length of train dataset:", len(train_dataset))
# print("Length of val dataset:", len(val_dataset))


In [9]:
from torch.utils.data import DataLoader

def load_data(train_dataset, val_dataset,test_dataset, collate_fn,batch_size=128):
    
    '''
    Implement this function to return the data loader for  train and validation dataset. 
    Set batchsize to batch_size. Set `shuffle=True` only for train dataloader.
    
    Arguments:
        train dataset: train dataset of type `CustomDataset`
        val dataset: validation dataset of type `CustomDataset`
        test dataset: test dataset of type `CustomDataset`
        
        collate_fn: collate function
        
    Outputs:
        train_loader, val_loader, test_dataset : train and validation and test dataloaders
    '''
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = load_data(train_dataset, val_dataset,test_dataset, collate_fn,batch_size = N_BATCH)

In [10]:
def get_last_visit(hidden_states, masks):
    """
    In RNN, gettin the last hidden state form list of hidden states. 
    """
    mask_length = masks.count_nonzero(dim=1)
    return hidden_states[range(hidden_states.shape[0]),mask_length-1,:]

# GRU Net Model

In [11]:
# input = torch.randn(batch_size, sequence_length, input_size)

print_flag = False
class GRUModel(nn.Module):
    """
    GRU only model.
    """
    def __init__(self):
        super(GRUModel, self).__init__()
#         l_embed = lasagne.layers.DenseLayer(l_in, num_units=embedsize, b=None, num_leading_axes=2)
        self.embed = nn.Linear(N_VOCAB, EMBED_SIZE, bias=False)
        self.gru = nn.GRU(input_size=EMBED_SIZE, hidden_size=N_HIDDEN, batch_first=True)
        self.fc = nn.Linear(in_features= N_HIDDEN, out_features=MAX_LENGTH)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, masks):
        # forward pass
        if print_flag: print( "x" ,x.shape)
        if print_flag: print( "masks",masks.shape)
        batch_size = x.shape[0]
        if print_flag: print( "batch_size", batch_size)
        x_embed = self.embed(x)
        if print_flag: print( "x_embed", x_embed.shape)
        output, h_n = self.gru(x_embed)
        if print_flag: print( "output", output.shape)
        if print_flag: print( "h_n", h_n.shape)
        true_h_n = get_last_visit(output, masks)
        if print_flag: print( "true_h_n",true_h_n.shape)
        logits = self.fc(true_h_n)   
        if print_flag: print( "logits",logits.shape)
        probs = self.sigmoid(logits)
        if print_flag: print( "probs",probs.shape)
        probs_ret = probs.view((batch_size,-1))
        if print_flag: print( "probs_ret",probs_ret.shape)
        return probs_ret
    
## H0 defaults to zeros if not provided.
#     def initHidden(self):
#         return torch.zeros(1, N_HIDDEN)

In [12]:
gru_rnn = GRUModel()
gru_rnn

GRUModel(
  (embed): Linear(in_features=619, out_features=100, bias=False)
  (gru): GRU(100, 200, batch_first=True)
  (fc): Linear(in_features=200, out_features=300, bias=True)
  (sigmoid): Sigmoid()
)

In [13]:
train_iter = iter(train_loader)
x, x_mask, y, lengths, eventLengths = next(train_iter)
x.shape, y.shape

(torch.Size([1, 300, 619]), torch.Size([1, 300]))

In [14]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(gru_rnn.parameters(), lr=LEARNING_RATE)

In [15]:
def eval_model(model, data_loader):
    
    """
    evaluate the model. GRU
    """
    def apply_mask(lengths, y):
        """
        for metrics need to somehow for each patient get only real n_visit not all of max_length.
            like new_testlabels.extend(inputs[1].flatten()[:leng])
        """
        result = []
        for i, l in enumerate(lengths):
            result.extend(y[i][:l])
        return result

    model.eval()
    y_pred = [] # torch.LongTensor()
    y_score = []#torch.Tensor()
    y_true = []#torch.LongTensor()
    model.eval()
    for x, x_mask, y, lengths, eventLengths in data_loader:
#         print("lengths: ", lengths)
        y_hat_prob = model(x, x_mask)
        y_score.extend(apply_mask(lengths, y_hat_prob.detach().to(device)))
        
        y_hat = (y_hat_prob > THRESHOLD).int()
#         print(np.shape(y_pred), np.shape(y_true))
        y_pred.extend(apply_mask(lengths, y_hat.detach().to(device)))
        y_true.extend(apply_mask(lengths, y.detach().to(device)))
#         print(np.shape(y_pred), np.shape(y_true))
#         print(y_pred[:2])
        
    """
        Calculate precision, recall, f1, and roc auc scores.
        Use `average='binary'` for calculating precision, recall, and fscore.
    """
    
#     print("after loop: ", np.shape(y_pred), np.shape(y_true))
    acc = accuracy_score(y_true, y_pred)
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    roc_auc = roc_auc_score(y_true, y_score)
    pr_auc = pr_auc_score(y_true, y_score)
    return acc, p, r, f, roc_auc, pr_auc

In [16]:
def train(model, train_loader, val_loader, n_epochs, criterion, optimizer):
    """
    train the model. GRU
    """
    
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for x, x_mask, y, lengths, eventLengths in train_loader:
            """
                1. zero grad
                2. model forward
                3. calculate loss
                4. loss backward
                5. optimizer step
            """
            outputs = model(x, x_mask)
#             print("outputs",outputs.shape)
#             print("y",y.shape)
            loss = criterion(outputs, y) 
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        acc, p, r, f, roc_auc, pr_auc = eval_model(model, val_loader)
        print('Epoch: {} \t Validation acc: {:.2f}, p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, pr_auc: {:.2f}'
              .format(epoch+1, acc, p, r, f, roc_auc, pr_auc))

In [17]:
# train(gru_rnn, train_loader, val_loader, NUM_EPOCHS, criterion, optimizer)

In [18]:
# acc, p, r, f, roc_auc,pr_auc = eval_model(gru_rnn, val_loader)
# print('Validation acc: {:.2f}, p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, pr_auc: {:.2f}'
#               .format(acc, p, r, f, roc_auc, pr_auc))


# acc, p, r, f, roc_auc,pr_auc  = eval_model(gru_rnn, test_loader)
# print('Test acc: {:.2f}, p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, pr_auc: {:.2f}'
#               .format(acc, p, r, f, roc_auc, pr_auc))

# The CONTENT Model

In [19]:
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score 
from sklearn.metrics import average_precision_score as pr_auc


def eval_model_content(model, data_loader):
    
    """
    evaluate the model. content

    """
    def apply_mask(lengths, y):
        """
        for metrics need to somehow for each patient get only real n_visit not all of max_length.
            like new_testlabels.extend(inputs[1].flatten()[:leng])
        """
        result = []
        for i, l in enumerate(lengths):
            result.extend(y[i][:l])
        return result

    model.eval()
    y_pred = [] # torch.LongTensor()
    y_score = []#torch.Tensor()
    y_true = []#torch.LongTensor()
    model.eval()
    for x, x_mask, y, lengths, eventLengths in data_loader:
#         print("lengths: ", lengths)
        y_hat_prob, klterm = model(x, x_mask)
        y_score.extend(apply_mask(lengths, y_hat_prob.detach().to(device)))
        
        y_hat = (y_hat_prob > THRESHOLD).int()
#         print(np.shape(y_pred), np.shape(y_true))
        y_pred.extend(apply_mask(lengths, y_hat.detach().to(device)))
        y_true.extend(apply_mask(lengths, y.detach().to(device)))
#         print(np.shape(y_pred), np.shape(y_true))
#         print(y_pred[:2])
        
    """
        Calculate precision, recall, f1, and roc auc scores.
        Use `average='binary'` for calculating precision, recall, and fscore.
    """
    
#     print("after loop: ", np.shape(y_pred), np.shape(y_true))
    acc = accuracy_score(y_true, y_pred)
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    roc_auc = roc_auc_score(y_true, y_score)
    pr_auc = pr_auc_score(y_true, y_score)
    return acc, p, r, f, roc_auc, pr_auc

In [20]:
def train_content(model, train_loader, val_loader, n_epochs, criterion, optimizer):
    """
    train the model. using klterm

    """
    
    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for x, x_mask, y, lengths, eventLengths in train_loader:
            """
                1. zero grad
                2. model forward
                3. calculate loss
                4. loss backward
                5. optimizer step
            """
            outputs, klterm = model(x, x_mask)
#             print("outputs",outputs.shape)
#             print("y",y.shape)
            loss = criterion(outputs, y) + klterm
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss / len(train_loader)
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        acc, p, r, f, roc_auc, pr_auc = eval_model_content(model, val_loader)
        print('Epoch: {} \t Validation acc: {:.2f}, p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, pr_auc: {:.2f}'
              .format(epoch+1, acc, p, r, f, roc_auc, pr_auc))

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class ThetaLayer(nn.Module):
    """
    Part of recognition network. Distribution.
    
    We didn't find this part of the code in the original author's code repository. 
    We used openAI's ChatGPT 3.5 to help generate intial version of this and then modified it further. 
    
    """
    def __init__(self, n_topics, max_length):
        super(ThetaLayer, self).__init__()
        self.n_topics = n_topics
        self.max_length = max_length
        self.klterm = None

    def forward(self, mu, log_sigma):
        self.klterm = -0.5 * torch.sum(1 + 2 * log_sigma - mu.pow(2) - torch.exp(log_sigma).pow(2)) / mu.size(0)
        eps = torch.randn(mu.size(0), self.max_length, self.n_topics).to(mu.device)
        theta = mu.unsqueeze(1) + torch.exp(log_sigma).unsqueeze(1) * eps # sampling from the normal distribution of mu and sigma
        self.theta = nn.Parameter(theta.mean(dim=0, keepdim=False))
        return self.theta

class CONTENT(nn.Module):
    """
    Full CONTENT model. Note: This implentation is incomplete. More optimization is needed.
    """
    def __init__(self, vocab_size, embed_size, n_hidden, n_topics, maxlen):
        super(CONTENT, self).__init__()
        
        # embed layer to reduce dimensionality
        self.l_embed = nn.Linear(vocab_size, embed_size, bias=False)
        
        # GRU
        self.l_forward0 = nn.GRU(embed_size, n_hidden, batch_first=True, bidirectional=False)

        # Recognition Net
        self.l_1 = nn.Linear(vocab_size, n_hidden)
        self.l_2 = nn.Linear(n_hidden, n_hidden)
        
        self.mu = nn.Linear(n_hidden, n_topics)
        self.log_sigma = nn.Linear(n_hidden, n_topics)
        self.l_theta = ThetaLayer(n_topics, maxlen)
        
        self.l_B = nn.Linear(vocab_size, n_topics, bias=False) 
        self.l_context = nn.Sequential(
            nn.BatchNorm1d(n_topics),
            nn.Linear(n_topics, n_topics)
        )
        self.l_dense0 = nn.Linear(n_hidden, 1)
        self.l_dense1 = nn.Flatten(start_dim=1)
        self.l_dense = nn.Sequential(
            nn.Linear(maxlen, 1),
            nn.Flatten(start_dim=1),
        )
        

    def forward(self, x, mask):
        # forward pass
        printf = False
        batch_size = x.shape[0]
        if printf: print("x", x.shape)
        
        x_emb = self.l_embed(x)
        if printf: print("x_emb",x_emb.shape)
        
        x_forward, _ = self.l_forward0(x_emb)
        if printf: print("mask",mask.shape)
        
        x_forward = x_forward * mask.unsqueeze(2)
        if printf: print("x_forward",x_forward.shape)

        x_1 = F.relu(self.l_1(x))
        x_2 = F.relu(self.l_2(x_1))
        
        # mu, sigma as per paper: batchsize * n_topic
        mu = self.mu(x_2)
        log_sigma = self.log_sigma(x_2)
        if printf: print("mu",mu.shape)
        if printf: print("log_sigma",log_sigma.shape)

        
        theta = self.l_theta(mu, log_sigma) # batchsize * maxlen * n_topic
        if printf: print("theta",theta.shape)
        klterm = self.l_theta.klterm # to be used in cost calculation
        if printf: print("klterm",klterm)
        
        b = self.l_B(x)
        if printf: print("b",b.shape)
        
        context = b * theta # elem wise multiplication
        if printf: print("context before layer",context.shape)
#         context = self.l_context(context)
#         if printf: print("context after layer",context.shape)
        context = torch.mean(context, dim=-1)
        if printf: print("context",context.shape)
        
        # combine GRU and context
        x_dense0 = self.l_dense0(x_forward)
        if printf: print("x_dense0",x_dense0.shape)
        
        x_dense1 = self.l_dense1(x_dense0)
        if printf: print("x_dense1",x_dense1.shape)
#         x_dense = self.l_dense(torch.cat((x_dense1, context), dim=1))

        if printf: print("x_dense1 + context",(x_dense1 + context).shape)
        x_dense = self.l_dense(x_dense1 + context)
        if printf: print("x_dense",x_dense.shape)

        output = torch.sigmoid(x_dense) * mask + 0.000001
        if printf: print("output",output.shape)
#         output = output.view(-1, maxlen)
        output = output.view((batch_size,-1))
        if printf: print("output",output.shape)
        
        return output, klterm

# Define model and optimizer
content_model = CONTENT(N_VOCAB, EMBED_SIZE, N_HIDDEN, N_TOPICS, MAX_LENGTH)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(content_model.parameters(),lr=LEARNING_RATE)

In [22]:
# train_iter = iter(train_loader)
# x, x_mask, y, lengths, eventLengths = next(train_iter)
# x.shape, y.shape

# model = content_model
# n_epochs = NUM_EPOCHS
# for epoch in range(n_epochs):
#     model.train()
#     train_loss = 0
#     for x, x_mask, y, lengths, eventLengths in train_loader:
#         """
#             1. zero grad
#             2. model forward
#             3. calculate loss
#             4. loss backward
#             5. optimizer step
#         """
#         outputs, klterm = model(x, x_mask)
# #             print("outputs",outputs.shape)
# #             print("y",y.shape)
#         loss = criterion(outputs, y) + klterm
#         # Backward pass
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#         train_loss += loss.item()
#     train_loss = train_loss / len(train_loader)
#     print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
#     p, r, f, roc_auc = eval_model_content(model, val_loader)
#     print('Epoch: {} \t Validation p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}'
#           .format(epoch+1, p, r, f, roc_auc))
#     break

In [23]:
# train_content(content_model, train_loader, val_loader, NUM_EPOCHS, criterion, optimizer)

In [24]:
# acc, p, r, f, roc_auc,pr_auc  = eval_model_content(content_model, val_loader)
# print('Validation acc: {:.2f}, p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, pr_auc: {:.2f}'
#               .format(acc, p, r, f, roc_auc, pr_auc))

# acc, p, r, f, roc_auc,pr_auc  = eval_model_content(content_model, test_loader)
# print('Test acc: {:.2f}, p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}, pr_auc: {:.2f}'
#               .format(acc, p, r, f, roc_auc, pr_auc))


# Multiple Runs and Results

In [25]:
NUM_RUNS = 10

In [None]:
results_gru = pd.DataFrame()

for i in range(NUM_RUNS):
    print(i)
    
    # GRU
    gru_rnn = GRUModel()
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(gru_rnn.parameters(), lr=LEARNING_RATE)
    train(gru_rnn, train_loader, val_loader, NUM_EPOCHS, criterion, optimizer)
    acc, p, r, f, roc_auc,pr_auc  = eval_model(gru_rnn, test_loader)
    tmp = pd.DataFrame({'roc_auc':[roc_auc] , 'pr_auc':[pr_auc], 'accuracy':[acc], 'precision':[p], 'recall':[r], 'f1':[f]}).round(2)
    results_gru = pd.concat([results_gru,tmp])
    

0
Epoch: 1 	 Training Loss: 0.241364
Epoch: 1 	 Validation acc: 0.72, p: 0.29, r:0.16, f: 0.20, roc_auc: 0.56, pr_auc: 0.26
Epoch: 2 	 Training Loss: 0.214809
Epoch: 2 	 Validation acc: 0.73, p: 0.32, r:0.18, f: 0.23, roc_auc: 0.59, pr_auc: 0.28
Epoch: 3 	 Training Loss: 0.170089
Epoch: 3 	 Validation acc: 0.74, p: 0.37, r:0.21, f: 0.26, roc_auc: 0.64, pr_auc: 0.32
Epoch: 4 	 Training Loss: 0.158554
Epoch: 4 	 Validation acc: 0.75, p: 0.39, r:0.21, f: 0.27, roc_auc: 0.65, pr_auc: 0.33
Epoch: 5 	 Training Loss: 0.155061
Epoch: 5 	 Validation acc: 0.73, p: 0.36, r:0.23, f: 0.28, roc_auc: 0.64, pr_auc: 0.31
Epoch: 6 	 Training Loss: 0.151918
Epoch: 6 	 Validation acc: 0.75, p: 0.39, r:0.21, f: 0.27, roc_auc: 0.65, pr_auc: 0.33
1
Epoch: 1 	 Training Loss: 0.245764
Epoch: 1 	 Validation acc: 0.71, p: 0.30, r:0.20, f: 0.24, roc_auc: 0.58, pr_auc: 0.28
Epoch: 2 	 Training Loss: 0.176100
Epoch: 2 	 Validation acc: 0.73, p: 0.34, r:0.20, f: 0.25, roc_auc: 0.62, pr_auc: 0.31
Epoch: 3 	 Training 

In [None]:
display(results_gru)
display(results_gru.mean().round(2))
display(results_gru.std().round(2))
results_gru.to_csv(f"pytorch_results_gru_epoch{NUM_EPOCHS}.csv")

In [25]:
results_content = pd.DataFrame()

for i in range(NUM_RUNS):
    print(i)    
    
    # CONTENT
    content_model = CONTENT(N_VOCAB, EMBED_SIZE, N_HIDDEN, N_TOPICS, MAX_LENGTH)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(content_model.parameters(),lr=LEARNING_RATE)
    train_content(content_model, train_loader, val_loader, NUM_EPOCHS, criterion, optimizer)
    acc, p, r, f, roc_auc,pr_auc  = eval_model_content(content_model, test_loader)
    tmp = pd.DataFrame({'roc_auc':[roc_auc] , 'pr_auc':[pr_auc], 'accuracy':[acc], 'precision':[p], 'recall':[r], 'f1':[f]}).round(2)
    results_content = pd.concat([results_content,tmp])
    

0
Epoch: 1 	 Training Loss: 10.392436
Epoch: 1 	 Validation acc: 0.78, p: 0.54, r:0.11, f: 0.18, roc_auc: 0.68, pr_auc: 0.39
Epoch: 2 	 Training Loss: 10.287996
Epoch: 2 	 Validation acc: 0.78, p: 0.62, r:0.07, f: 0.12, roc_auc: 0.68, pr_auc: 0.39
Epoch: 3 	 Training Loss: 10.286212
Epoch: 3 	 Validation acc: 0.78, p: 0.59, r:0.08, f: 0.14, roc_auc: 0.67, pr_auc: 0.39
Epoch: 4 	 Training Loss: 10.285068
Epoch: 4 	 Validation acc: 0.78, p: 0.59, r:0.08, f: 0.15, roc_auc: 0.66, pr_auc: 0.37
Epoch: 5 	 Training Loss: 10.284655
Epoch: 5 	 Validation acc: 0.77, p: 0.49, r:0.17, f: 0.25, roc_auc: 0.67, pr_auc: 0.38
Epoch: 6 	 Training Loss: 10.284005
Epoch: 6 	 Validation acc: 0.78, p: 0.63, r:0.04, f: 0.07, roc_auc: 0.66, pr_auc: 0.37
1
Epoch: 1 	 Training Loss: 10.380548
Epoch: 1 	 Validation acc: 0.78, p: 0.52, r:0.15, f: 0.23, roc_auc: 0.68, pr_auc: 0.39
Epoch: 2 	 Training Loss: 10.287967
Epoch: 2 	 Validation acc: 0.78, p: 0.54, r:0.09, f: 0.15, roc_auc: 0.68, pr_auc: 0.39
Epoch: 3 	 T

In [33]:
display(results_content)
display(results_content.mean().round(2))
display(results_content.std().round(2))
results_content.to_csv(f"pytorch_results_content_epoch{NUM_EPOCHS}.csv")

Unnamed: 0,roc_auc,pr_auc,accuracy,precision,recall,f1
0,0.64,0.33,0.77,0.46,0.07,0.11
0,0.67,0.37,0.77,0.47,0.15,0.23
0,0.66,0.37,0.78,0.52,0.13,0.21
0,0.67,0.37,0.77,0.45,0.19,0.26
0,0.66,0.37,0.77,0.46,0.18,0.26
0,0.65,0.32,0.76,0.3,0.04,0.07
0,0.66,0.37,0.78,0.53,0.12,0.19
0,0.67,0.36,0.77,0.47,0.2,0.28
0,0.65,0.34,0.76,0.42,0.15,0.22
0,0.67,0.38,0.77,0.47,0.21,0.29


roc_auc      0.66
pr_auc       0.36
accuracy     0.77
precision    0.45
recall       0.14
f1           0.21
dtype: float64

roc_auc      0.01
pr_auc       0.02
accuracy     0.01
precision    0.06
recall       0.06
f1           0.07
dtype: float64

In [None]:
"""
Run on a Thinkpad machine
with 16GB RAM,
Processor 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz, 2803 Mhz, 4 Core(s), 8 Logical Processor(s)
Windows 11 OS.
Device GPU was not used for training.
"""