In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
from math import log

In [None]:
from keras import Input
from keras.layers import Dense
from tensorflow.keras import Sequential
from keras.layers.embeddings import Embedding
from keras.layers import LSTM
from sklearn.utils import class_weight
from keras.layers import Flatten
from keras.initializers import Constant
from keras.models import model_from_json
import tensorflow as tf
from keras.layers import Bidirectional
from keras.layers import Dropout
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader 
import torch.nn.functional as F
from keras.preprocessing import sequence

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

## Preprocessing of Data

In [None]:
def get_data(file_name , phase = 'train' , type = 'event'): # phase -(train ,test) , type - (event , time)

  df=pd.read_csv(file_name,header=None)

  X , y =[] ,[]

  for i in range(len(df[0])):

    data = df[0][i].split(' ')[:-1]

    if (type == 'time'):
      data = [log(float(elem)+1,10) for elem in data]
    else:
      data = [int(elem) for elem in data]

    if( len(data) > 1):

      if(phase == 'train'):

        #for j in range(1,len(data)):
        X.append(data[:-1])
        y.append(data[-1])

      else:

        X.append(data[:-1])
        y.append(data[-1])

  print(phase , type , len(X) ,len(y))
  return X , y


In [None]:
vocab=[str(i) for i in range(0,23)]

## DataLoader

In [None]:
class MimicDataset(Dataset):

    def __init__(self, E, Y_e, T, Y_t):
        self.E = E
        self.Y_e = Y_e
        self.T = T
        self.Y_t = Y_t
        
    def __len__(self):
        return len(self.Y_e)
    
    def __getitem__(self, idx):

        if (self.T is not None):

          if (self.Y_t is not None):
            return torch.from_numpy(self.E[idx]), torch.tensor(self.Y_e[idx]), torch.tensor(self.T[idx]).float(), torch.tensor(self.Y_t[idx])
          else:
            return torch.from_numpy(self.E[idx]), torch.tensor(self.Y_e[idx]), torch.tensor(self.T[idx]).float(), torch.tensor([0])

        else:
            return torch.from_numpy(self.E[idx]), torch.tensor(self.Y_e[idx]) , torch.tensor([0]), torch.tensor([0])

In [None]:
def get_data_loader(X,Y_e,T=None,Y_t=None,batch_size=32):

  if T is not None:

    if Y_t is not None:
      dataset = MimicDataset(np.array(X), np.array(Y_e),np.array(T),np.array(Y_t))
      data_loader = DataLoader(dataset, batch_size)
    else:
      dataset = MimicDataset(np.array(X), np.array(Y_e),np.array(T),None)
      data_loader = DataLoader(dataset, batch_size)

  else:
    dataset = MimicDataset(np.array(X), np.array(Y_e),None,None)
    data_loader = DataLoader(dataset, batch_size)

  return data_loader


In [None]:
def Data_Loader(files): #[train_event , test_event , train_time , test_time]

  X_event_train , y_event_train = get_data(files[0] , 'train' , 'event')
  X_event_test , y_event_test = get_data(files[1] , 'test' ,'event')

  X_time_train , y_time_train = get_data(files[2] , 'train' ,'time')
  X_time_test , y_time_test = get_data(files[3] , 'test', 'time')

  
  max_len=max([len(i) for i in X_event_train])


  X_event_train = sequence.pad_sequences(X_event_train , maxlen=max_len,padding="pre")
  X_time_train = sequence.pad_sequences(X_time_train , maxlen=max_len,padding="pre")

  Y_event_train = y_event_train
  Y_time_train = y_time_train

  X_event_test = sequence.pad_sequences(X_event_test, maxlen=max_len,padding="pre")
  X_time_test = sequence.pad_sequences(X_time_test, maxlen=max_len,padding="pre")

  y_event_test = y_event_test
  y_time_test = y_time_test

  train_loader = get_data_loader(X_event_train , y_event_train , X_time_train , y_time_train)
  test_loader = get_data_loader(X_event_test , y_event_test , X_time_test , y_time_test)

  return (train_loader , test_loader)


## Attention Block

In [None]:
class SelfAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert (
            self.head_dim * heads == embed_size
        ), "Embedding size needs to be divisible by heads"

        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.fc_out = nn.Linear(heads * self.head_dim, embed_size)

    def forward(self, values, keys, query, mask):
        # Get number of training examples
        N = query.shape[0]
        value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]
        # Split the embedding into self.heads different pieces
        values = values.reshape(N, value_len, self.heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.heads, self.head_dim)
        query = query.reshape(N, query_len, self.heads, self.head_dim)
        values = self.values(values)  # (N, value_len, heads, head_dim)
        keys = self.keys(keys)  # (N, key_len, heads, head_dim)
        queries = self.queries(query)  # (N, query_len, heads, heads_dim)

        # Einsum does matrix mult. for query*keys for each training example
        # with every other training example, don't be confused by einsum
        # it's just how I like doing matrix multiplication & bmm

        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
        # queries shape: (N, query_len, heads, heads_dim),
        # keys shape: (N, key_len, heads, heads_dim)
        # energy: (N, heads, query_len, key_len)

        # Mask padded indices so their weights become 0
        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))

        # Normalize energy values similarly to seq2seq + attention
        # so that they sum to 1. Also divide by scaling factor for
        # better stability
        attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)
        # attention shape: (N, heads, query_len, key_len)

        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
            N, query_len, self.heads * self.head_dim
        )
        # attention shape: (N, heads, query_len, key_len)
        # values shape: (N, value_len, heads, heads_dim)
        # out after matrix multiply: (N, query_len, heads, head_dim), then
        # we reshape and flatten the last two dimensions.

        out = self.fc_out(out)
        # Linear layer doesn't modify the shape, final shape will be
        # (N, query_len, embed_size)

        return out

class TransformerBlock(nn.Module):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion * embed_size, embed_size),
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, value, key, query, mask=None):
        attention = self.attention(value, key, query, mask)

        # Add skip connection, run through normalization and finally dropout
        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out

### Model

In [None]:
class Model_Attention(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    #self.embedding_time=nn.Embedding(3,embedding_dim)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,204)
    self.linear_emb=nn.Linear(3,embedding_dim)
    self.linear_proj=nn.Linear(1,3)
    self.linear_duration_proj=nn.Linear(hidden_dim,1)
    self.linear_cat=nn.Linear(2*embedding_dim,embedding_dim)
    self.relu=nn.ReLU()
    self.softmax=nn.Softmax(dim=2)

    self.norm_event= nn.LayerNorm(embedding_dim)
    self.transformer_block_time= TransformerBlock(embed_size=embedding_dim, heads=1, dropout=0.5, forward_expansion=1)
    self.transformer_block_event= TransformerBlock(embed_size=embedding_dim, heads=1, dropout=0.5, forward_expansion=1)
    self.transformer_block_cross= TransformerBlock(embed_size=embedding_dim, heads=1, dropout=0.5, forward_expansion=1)

  def forward(self,event,event_time):
    event_time = event_time.unsqueeze(-1)
    event_embedding=self.embedding(event)
    #print(event_embedding)
    #print()
    projection_embedding=self.linear_proj(event_time)
    #print(projection_embedding)
    #print()
    soft_embedding=self.softmax(projection_embedding)
    #mask_embedding=self.linear_mask(ctx_embedding)
    #print(mask_embedding)
    
    #print()
    time_embedding=self.linear_emb(soft_embedding)

    time_attention=self.transformer_block_time(time_embedding,time_embedding,time_embedding)

    # Self Attention Event Embedding

    event_attention=self.transformer_block_event(event_embedding,event_embedding,event_embedding)

    # Cross Attention

    input_lstm=self.transformer_block_cross(time_attention,time_attention,event_attention)

    input_lstm=self.norm_event(input_lstm+event_embedding)

    lstm_out,(ht,ct)=self.lstm(input_lstm)

    #print("After LSTM Layer")
    output1=self.linear(ht[-1])
    output2=self.linear_duration_proj(ht[-1])
    return (output1,output2)

In [None]:
class Model_TimeContext(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,len(vocab))
    self.linear_ctx=nn.Linear(1,1)
    self.linear_mask=nn.Linear(1,embedding_dim) #(1,embedding dim)
    self.relu=nn.ReLU()

  def forward(self,event,event_time):
    event_time = event_time.unsqueeze(-1)
    #print(event.shape , event_time.shape)
    event_embedding=self.embedding(event)
    #print(event_embedding.shape)
    ctx_embedding=self.linear_ctx(event_time)
    #print(ctx_embedding.shape)
    ctx_embedding=self.relu(ctx_embedding)
    #print(ctx_embedding.shape)
    mask_embedding=self.linear_mask(ctx_embedding)
    #print(mask_embedding.shape)
    mask_embedding_sig=torch.sigmoid(mask_embedding)
    #print(event_embedding.shape , mask_embedding_sig.shape)
    input_lstm=torch.mul(event_embedding,mask_embedding_sig)
    #print(input_lstm.shape)
    lstm_out,(ht,ct)=self.lstm(input_lstm)
    #print("After LSTM Layer")
    output=self.linear(ht[-1])
    return output

In [None]:
class Model_TimeConcat(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    self.lstm=nn.LSTM(embedding_dim+1,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,len(vocab))

  def forward(self,event,event_time):
    event_embedding=self.embedding(event)
    event_time = event_time.unsqueeze(-1)
    input_lstm=torch.cat((event_embedding,event_time),2)
    #print(input_lstm.shape)
    lstm_out,(ht,ct)=self.lstm(input_lstm)
    #print("After LSTM Layer")
    output=self.linear(ht[-1])
    return output


In [None]:
class Model_NoTime(torch.nn.Module):

  def __init__(self,vocab,embedding_dim,hidden_dim):

    super().__init__()
    self.embedding_event=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,len(vocab))
    self.linear1=nn.Linear(embedding_dim,7)

  def forward(self,x):
    #print(x)
    x=self.embedding_event(x)
    #x=self.linear1(x)
    #print('after_linear')
    lstm_out, (ht, ct)=self.lstm(x)
    x=self.linear(ht[-1])
    #print(lstm_out.shape)
    return x



In [None]:
class Model_TimeJoint(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    #self.embedding_time=nn.Embedding(3,embedding_dim)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,len(vocab))
    self.linear_emb=nn.Linear(3,embedding_dim)
    self.linear_proj=nn.Linear(1,3)
    self.relu=nn.ReLU()
    self.softmax=nn.Softmax(dim=2)

  def forward(self,event,event_time):
    event_time = event_time.unsqueeze(-1)
    event_embedding=self.embedding(event)
    #print(event_embedding)
    #print()
    projection_embedding=self.linear_proj(event_time)
    #print(projection_embedding)
    #print()
    soft_embedding=self.softmax(projection_embedding)
    #mask_embedding=self.linear_mask(ctx_embedding)
    #print(mask_embedding)
    
    #print()
    time_embedding=self.linear_emb(soft_embedding)
    #print(time_embedding)
    input_lstm=(event_embedding+time_embedding)/2
    #print()
    #print(mask_embedding_sig
    #print(input_lstm)
    #print()
    lstm_out,(ht,ct)=self.lstm(input_lstm)
    #print("After LSTM Layer")
    output=self.linear(ht[-1])
    return output

In [None]:
class Model_TimeJointRegularized(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    #self.embedding_time=nn.Embedding(3,embedding_dim)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,len(vocab))
    self.linear_emb=nn.Linear(3,embedding_dim)
    self.linear_proj=nn.Linear(1,3)
    self.linear_duration_proj=nn.Linear(hidden_dim,1)
    self.relu=nn.ReLU()
    self.softmax=nn.Softmax(dim=2)

  def forward(self,event,event_time):
    event_time = event_time.unsqueeze(-1)
    event_embedding=self.embedding(event)
    #print(event_embedding)
    #print()
    projection_embedding=self.linear_proj(event_time)
    #print(projection_embedding)
    #print()
    soft_embedding=self.softmax(projection_embedding)
    #mask_embedding=self.linear_mask(ctx_embedding)
    #print(mask_embedding)
    
    #print()
    time_embedding=self.linear_emb(soft_embedding)
    #print(time_embedding)
    input_lstm=(event_embedding+time_embedding)/2
    #print()
    #print(mask_embedding_sig
    #print(input_lstm)
    #print()
    lstm_out,(ht,ct)=self.lstm(input_lstm)

    #print("After LSTM Layer")
    output1=self.linear(ht[-1])
    output2=self.linear_duration_proj(ht[-1])
    return (output1,output2)

In [None]:
class Model_TimeContextRegularized(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim , len(vocab))
    self.linear_ctx=nn.Linear(1,1)
    self.linear_proj=nn.Linear(1,3)
    self.linear_mask=nn.Linear(1,embedding_dim) #(1,embedding dim)
    self.linear_duration_proj=nn.Linear(hidden_dim,1)
    self.relu=nn.ReLU()

  def forward(self,event,event_time):
    event_time = event_time.unsqueeze(-1)
    #print(event.shape , event_time.shape)
    event_embedding=self.embedding(event)
    #print(event_embedding.shape)
    ctx_embedding=self.linear_ctx(event_time)
    #print(ctx_embedding.shape)
    ctx_embedding=self.relu(ctx_embedding)
    #print(ctx_embedding.shape)
    mask_embedding=self.linear_mask(ctx_embedding)
    #print(mask_embedding.shape)
    mask_embedding_sig=torch.sigmoid(mask_embedding)
    #print(event_embedding.shape , mask_embedding_sig.shape)
    input_lstm=torch.mul(event_embedding,mask_embedding_sig)
    #print(input_lstm.shape)
    lstm_out,(ht,ct)=self.lstm(input_lstm)
    #print("After LSTM Layer")
    output1=self.linear(ht[-1])
    output2=self.linear_duration_proj(ht[-1])
    return (output1,output2)

In [None]:
class Model_FeedForward(torch.nn.Module):
  def __init__(self,vocab,embedding_dim,hidden_dim):
    super().__init__()
    self.embedding=nn.Embedding(len(vocab),embedding_dim,padding_idx=0)
    #self.embedding_time=nn.Embedding(3,embedding_dim)
    self.lstm=nn.LSTM(embedding_dim,hidden_dim,batch_first=True)
    self.linear=nn.Linear(hidden_dim,204)
    self.linear_emb=nn.Linear(3,embedding_dim)
    self.linear_proj=nn.Linear(1,3)
    self.linear_duration_proj=nn.Linear(hidden_dim,1)
    self.linear_cat=nn.Linear(2*embedding_dim,embedding_dim)
    self.relu=nn.ReLU()
    self.softmax=nn.Softmax(dim=2)

  def forward(self,event,event_time):
    event_time = event_time.unsqueeze(-1)
    event_embedding=self.embedding(event)
    #print(event_embedding)
    #print()
    projection_embedding=self.linear_proj(event_time)
    #print(projection_embedding)
    #print()
    soft_embedding=self.softmax(projection_embedding)
    #mask_embedding=self.linear_mask(ctx_embedding)
    #print(mask_embedding)
    
    #print()
    time_embedding=self.linear_emb(soft_embedding)

    input_lstm=torch.cat((event_embedding,time_embedding),2)

    input_lstm=self.linear_cat(input_lstm)
    lstm_out,(ht,ct)=self.lstm(input_lstm)

    #print("After LSTM Layer")
    output1=self.linear(ht[-1])
    output2=self.linear_duration_proj(ht[-1])
    return (output1,output2)

In [None]:
def get_model(type, vocab, embedding_dim, hidden_dim):

  if(type == 'NoTime'):
    model = Model_NoTime(vocab,embedding_dim,hidden_dim)

  if(type == 'TimeConcat'):
    model = Model_TimeConcat(vocab,embedding_dim,hidden_dim)

  if(type == 'TimeContext'):
    model = Model_TimeContext(vocab,embedding_dim,hidden_dim)

  if(type == 'TimeJoint'):
    model = Model_TimeJoint(vocab,embedding_dim,hidden_dim)

  if(type == 'TimeJointRegularized'):
    model = Model_TimeJointRegularized(vocab,embedding_dim,hidden_dim)

  if(type == 'TimeContextRegularized'):
    model = Model_TimeContextRegularized(vocab,embedding_dim,hidden_dim)

  if(type == 'FeedForward'):
    model = Model_FeedForward(vocab,embedding_dim,hidden_dim)

  if(type == 'Attention'):
    model = Model_Attention(vocab,embedding_dim,hidden_dim)

  return model

## Training

In [None]:
def cross_entropy_duration(pred, soft_targets):
    #print(pred.shape , soft_targets.shape)
    logsoftmax = nn.LogSoftmax(dim=1)
    return torch.sum(- soft_targets * logsoftmax(pred), 1).mean()

In [None]:
variance_train_data = torch.tensor(1.0,requires_grad=True) # need to be calculated...

def duration_regularization(pred , target):

  target = target.unsqueeze(-1)
  return torch.div((pred - target)**2 , 2* (variance_train_data**2)).sum()

In [None]:
def train_model(model,train_data_loader,test_data_loader , model_type,regularization = None , epochs=2,lr=0.001):

  model.to(device)
  parameters=filter(lambda p: p.requires_grad, model.parameters())
  optimizer = torch.optim.Adam(parameters, lr=lr)

  train_accuracy , train_loss , test_accuracy , test_loss = 0.0 , 0.0 , 0.0 ,0.0



  for i in range(epochs):
    correct=0
    total=0
    model.train()

    sum_loss=0.0

    for e,y_e,t,y_t in train_data_loader:
      
      loss = 0.0
      #print(x.shape , y.shape , t.shape)
      e = e.to(device)
      y_e = y_e.to(device)
      t = t.to(device)
      y_t = y_t.to(device)

      if ( model_type == 'NoTime'):
        y_e_pred = model(e)
      elif (model_type == 'TimeContextRegularized' or model_type == 'TimeJointRegularized' or model_type == 'FeedForward' or model_type == 'Attention'):  
        y_e_pred , y_t_pred = model(e,t)
      else:
        y_e_pred = model(e,t)

      if ( regularization == 'cross_entropy'):

        duration_embedding_pred = model.linear_proj(y_t_pred)

        with torch.no_grad():

          duration_embedding_true=model.linear_proj(y_t.unsqueeze(-1).type(torch.cuda.FloatTensor))
          softmax = nn.Softmax(dim=1)
          soft_duration_embedding_true=softmax(duration_embedding_true)

        loss += cross_entropy_duration(duration_embedding_pred,soft_duration_embedding_true)

      if ( regularization == 'nll'):

        loss += duration_regularization(y_t_pred,y_t)
     

      optimizer.zero_grad()

      l = F.cross_entropy(y_e_pred, y_e)
      loss += l
      #print(loss)
      loss.backward()
      optimizer.step()

      pred = torch.max(y_e_pred, 1)[1]
      #print(pred)
      correct += (pred== y_e).float().sum()
      total+=y_e.shape[0]
      sum_loss += loss.item()*y_e.shape[0]

    train_accuracy += correct/total
    train_loss += sum_loss/total

    #if(i%20==0):
     # print(i,"th Epoch, Train Accuracy=",(correct/total),"Train_loss=",(sum_loss/total))

  
  with torch.no_grad():
    model.eval()
    correct=0
    total=0
  
    sum_loss=0.0

    r5t , r10t ,r20t = 0,0,0

    for e,y_e,t,y_t in test_data_loader:
      
      loss = 0.0
      #print(x.shape , y.shape , t.shape)
      e = e.to(device)
      y_e = y_e.to(device)
      t = t.to(device)
      y_t = y_t.to(device)

      if ( model_type == 'NoTime'):
        y_e_pred = model(e)
      elif (model_type == 'TimeContextRegularized' or model_type == 'TimeJointRegularized' or model_type == 'FeedForward' or model_type == 'Attention'):  
        y_e_pred , y_t_pred = model(e,t)
      else:
        y_e_pred = model(e,t)

      if ( regularization == 'cross_entropy'):

        duration_embedding_pred = model.linear_proj(y_t_pred)

        
        duration_embedding_true=model.linear_proj(y_t.unsqueeze(-1).type(torch.cuda.FloatTensor))
        softmax = nn.Softmax(dim=1)
        soft_duration_embedding_true=softmax(duration_embedding_true)

        loss += cross_entropy_duration(duration_embedding_pred,soft_duration_embedding_true)

      if ( regularization == 'nll'):

        loss += duration_regularization(y_t_pred,y_t)
    
      l = F.cross_entropy(y_e_pred, y_e)
      loss += l
      pred = torch.max(y_e_pred, 1)[1]
    
   
      correct += (pred== y_e).float().sum()
      total+=y_e.shape[0]
      sum_loss += loss.item()*y_e.shape[0]
    
    #print("Test Accuracy=",(correct/total),"Test_loss=",(sum_loss/total))
    test_accuracy += correct/total
    test_loss += sum_loss/total

    return (train_accuracy/epochs , train_loss/epochs , test_accuracy , test_loss)



In [None]:
models = ['NoTime','TimeConcat','TimeJoint','TimeContext','TimeJointRegularized','TimeContextRegularized','FeedForward','Attention']
regularization = ['cross_entropy' , 'nll']

for model_type in models:

  regul = None
  if model_type == 'TimeJointRegularized' or model_type == 'TimeContextRegularized' or model_type == 'FeedForward' or model_type == 'Attention':
    regul = 'cross_entropy'

  total_train_acc , total_train_loss , total_test_acc , total_test_loss = [] ,[],[] ,[]

  for i in range(1,6):

    files = []
    root_path = '/content/drive/MyDrive/so/'
    files.append(root_path + 'event-' + str(i) + '-train.txt')
    files.append(root_path + 'event-' + str(i) + '-test.txt')
    files.append(root_path + 'time-' + str(i) + '-train.txt')
    files.append(root_path + 'time-' + str(i) + '-test.txt')
    
    model  = get_model(type = model_type , vocab = vocab , embedding_dim= 5 , hidden_dim = 10)
    train_loader , test_loader = Data_Loader(files) 
    train_acc , train_loss , test_acc , test_loss = train_model(model = model, train_data_loader = train_loader, test_data_loader = test_loader , model_type = model_type, regularization=regul, epochs=100, lr=0.001)

    total_train_acc.append(train_acc)
    total_train_loss.append(train_loss)
    total_test_acc.append(test_acc)
    total_test_loss.append(test_loss)

  best_index = np.argmax(total_test_acc)
  print('Model : {} ,Regularization : {} '.format(model_type , regul))
  print('Train Accuracy : {}'.format(total_train_acc[best_index]))
  print('Train Loss : {}'.format(total_train_loss[best_index]))
  print('Test Accuracy : {}'.format(total_test_acc[best_index]))
  print('Test Loss : {}'.format(total_test_loss[best_index]))
  print('*******************************************************************************************')
  print('*******************************************************************************************')

train event 5307 5307
test event 1326 1326
train time 5307 5307
test time 1326 1326


RuntimeError: ignored