In [None]:
import os
from os.path import dirname
root_path = dirname(dirname(os.getcwd()))
print(root_path)
import sys
sys.path.append(root_path + '/RemainingCycleTimePrediction/2_Scripts/')
import pandas as pd
import numpy as np
import copy
import datetime, time
import multiprocessing
import multiprocessing.dummy
from sklearn import metrics

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from ax.plot.contour import plot_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render

from Event_log_processing_utils import Extract_trace_and_temporal_features, Extract_prefix
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
import warnings
warnings.filterwarnings("ignore")

data_dir = root_path + '/RemainingCycleTimePrediction/1_Data/'
project_dir = root_path + '/RemainingCycleTimePrediction/'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

## 1. Load data

In [None]:
data_name = 'BPIC20'
# data_name = 'Helpdesk'
# data_name = 'EMS3141BE'

In [None]:
if data_name == 'BPIC20':
    end_act = "Payment Handled"
elif data_name == 'Helpdesk':
    end_act = "Closed"

In [None]:
tab_all = pd.read_csv(data_dir+data_name+"_processed_all.csv")
tab_train= pd.read_csv(data_dir+data_name+"_processed_train.csv")
tab_valid= pd.read_csv(data_dir+data_name+"_processed_valid.csv")
tab_test = pd.read_csv(data_dir+data_name+"_processed_test.csv")

## 2. Prepare inputs and outputs for model training

In [None]:
def Prepare_X_Y_next_activity(tab, list_activities, divisor, divisor2, encoder, le, maxlen):
    lines, lines_t, lines_t2, lines_t3, lines_t4 = Extract_trace_and_temporal_features(tab)
    prefixes, outputs = Extract_prefix(lines, lines_t, lines_t2, lines_t3, lines_t4)
    num_samples = len(prefixes[0])
    print('Vectorization...')
    num_features = len(list_activities)+5
    print('num features: {}'.format(num_features))
    X = np.zeros((num_samples, maxlen, num_features), dtype=np.float32)
    y_a = np.zeros((num_samples), dtype=np.float32)
    y_t = np.zeros((num_samples), dtype=np.float32)
    for i, sentence in enumerate(prefixes[0]):
        leftpad = maxlen-len(sentence)
        next_t = outputs[1][i]
        sentence_t = prefixes[1][i]
        sentence_t2 = prefixes[2][i]
        sentence_t3 = prefixes[3][i]
        sentence_t4 = prefixes[4][i]
        one_hot_act_matrix = encoder.transform(np.array(sentence).reshape((len(sentence), 1))).toarray()
        for t, char in enumerate(sentence):                
            X[i, t+leftpad, :len(list_activities)] = one_hot_act_matrix[t, :]
            X[i, t+leftpad, len(list_activities)] = t+1 # order of the activity in the sequence {1,...,maxlen}
            X[i, t+leftpad, len(list_activities)+1] = sentence_t[t]/divisor
            X[i, t+leftpad, len(list_activities)+2] = sentence_t2[t]/divisor2
            X[i, t+leftpad, len(list_activities)+3] = sentence_t3[t]/86400
            X[i, t+leftpad, len(list_activities)+4] = sentence_t4[t]/7

        y_a[i] = le.transform(np.array([[outputs[0][i]]]))

        y_t[i] = next_t/divisor
    return X, y_a, y_t

In [None]:
list_activities = list(tab_all["Activity"].unique())
num_activities = len(list_activities)
num_features = len(list_activities)+5
#creating instance of one-hot-encoder and fit on the whole dataset
encoder = OneHotEncoder(handle_unknown='ignore')
encoder.fit(np.array(list_activities).reshape((len(list_activities), 1)))
# transform label from string to number
le = preprocessing.LabelEncoder()
le.fit(list_activities)

lines, lines_t, lines_t2, lines_t3, lines_t4 = Extract_trace_and_temporal_features(tab_all)
maxlen = max([len(x) for x in lines]) #find maximum line size
lines, lines_t, lines_t2, lines_t3, lines_t4 = Extract_trace_and_temporal_features(tab_train)
divisor = np.mean([item for sublist in lines_t for item in sublist]) #average time between events
print('divisor: {}'.format(divisor))
divisor2 = np.mean([item for sublist in lines_t2 for item in sublist]) #average time between current and first events
print('divisor2: {}'.format(divisor2))
#Train data
X_train, y_a_train, y_t_train = Prepare_X_Y_next_activity(tab_train, list_activities, divisor, divisor2, encoder, le, maxlen)
#Valid data
X_valid, y_a_valid, y_t_valid = Prepare_X_Y_next_activity(tab_valid, list_activities, divisor, divisor2, encoder, le, maxlen)
#Test data
X_test, y_a_test, y_t_test = Prepare_X_Y_next_activity(tab_test, list_activities, divisor, divisor2, encoder, le, maxlen)

In [None]:
class EventLogData(Dataset):
    def __init__ (self, input_x, output_a, output_t):
        self.X = input_x
        self.y_a = output_a
        self.y_a = self.y_a.to(torch.float32).reshape((len(self.y_a),1))
        self.y_t = output_t
        self.y_t = self.y_t.to(torch.float32).reshape((len(self.y_t),1))

    #get the number of rows in the dataset
    def __len__(self):
        return len(self.X)

    #get a row at a particular index in the dataset
    def __getitem__ (self,idx):
        return [self.X[idx],self.y_a[idx],self.y_t[idx]]

In [None]:
valid_loader = DataLoader(EventLogData(torch.tensor(X_valid), torch.tensor(y_a_valid), torch.tensor(y_t_valid)),
                                batch_size=X_valid.shape[0],
                                shuffle=False)
test_loader = DataLoader(EventLogData(torch.tensor(X_test), torch.tensor(y_a_test), torch.tensor(y_t_test)),
                                batch_size=1,
                                shuffle=False)

## 3. Hyperparameter tuning with Ax package

In [None]:
# Creating the LSTM class
class LSTM(nn.Module):
    def __init__(self, parameterization):
        super(LSTM, self).__init__()
        self.hidden_dim = parameterization.get("neurons", 40)
        self.num_shared_layers = parameterization.get("shared_layers", 1)
        self.num_layers = parameterization.get("layers", 1)
        self.droppout_prob = parameterization.get("dropout", 0.2)
                
        self.lstm = nn.LSTM(input_size=num_features, hidden_size=self.hidden_dim, 
                            num_layers=self.num_shared_layers, batch_first=True, dropout=self.droppout_prob)   
        self.lstm_a = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, 
                            num_layers=self.num_layers, batch_first=True, dropout=self.droppout_prob) 
        self.lstm_t = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, 
                            num_layers=self.num_layers, batch_first=True, dropout=self.droppout_prob)
                
        self.fc_a = nn.Linear(self.hidden_dim, num_activities)
        self.fc_t = nn.Linear(self.hidden_dim, 1)
        self.softmax = nn.Softmax(dim=1)

    
    # Progresses data across layers    
    def forward(self, x):
        batch_size = x.size(0) 
        init_states_shared, init_cells_shared = self.init_hidden(batch_size, self.num_shared_layers)
        init_states_shared = init_states_shared.to(x.device)
        init_cells_shared = init_cells_shared.to(x.device)
        
        init_states, init_cells = self.init_hidden(batch_size, self.num_layers)
        init_states = init_states.to(x.device)
        init_cells = init_cells.to(x.device)
                
        shared_lstm_output, (last_Hidden_State, last_Cell_State) = self.lstm(x, (init_states_shared, init_cells_shared)) 
        lstm_output_a, (last_Hidden_State_a, last_Cell_State_a) = self.lstm_a(shared_lstm_output, 
                                                                                      (init_states, init_cells)) 
        lstm_output_t, (last_Hidden_State_t, last_Cell_State_t) = self.lstm_t(shared_lstm_output, 
                                                                                      (init_states, init_cells))
        out_a = self.softmax(self.fc_a(last_Hidden_State_a[-1]))
        
        out_t = self.fc_t(last_Hidden_State_t[-1])
        return out_a, out_t

    def init_hidden(self, batch_size, num_layers):
        init_states = []
        init_cells = []
        for i in range(num_layers):
            init_states.append(torch.zeros(batch_size, self.hidden_dim))
            init_cells.append(torch.zeros(batch_size, self.hidden_dim))
        return torch.stack(init_states, dim=0), torch.stack(init_cells, dim=0)      #(num_layers, B, H)
    
    
def net_train(net, train_loader, valid_loader, parameters, dtype, device, early_stop_patience):
    net.to(dtype=dtype, device=device)
    min_delta = 0
    # Define loss and optimizer
    criterion_a = nn.CrossEntropyLoss()
    criterion_t = nn.L1Loss()
    optimizer = optim.Adam(net.parameters(), lr=parameters.get("lr", 0.001)) # 0.001 is used if no lr is specified    
    num_epochs = 100 # Play around with epoch number
    
    # Train Network
    not_improved_count = 0
    start_time = time.time()
    for epoch in range(num_epochs):
        net.train()
        training_loss = 0
        num_train = 0
        for inputs, labels_a, labels_t in train_loader:
            # move data to proper dtype and device
            inputs = inputs.to(dtype=dtype, device=device)
            labels_a = labels_a.to(device=device)
            labels_t = labels_t.to(device=device)
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            output_a, output_t = net(inputs)
            loss_a = criterion_a(output_a, labels_a.to(torch.long).squeeze(1))
            loss_t = criterion_t(output_t, labels_t)
            # back prop
            loss = loss_a + loss_t
            loss.backward()
            # optimize
            optimizer.step()
            training_loss += loss.item()
            num_train+=1
        with torch.no_grad():
            net.eval()
            num_valid = 0
            validation_loss = 0
            for i,(inputs,targets_a, targets_t) in enumerate(valid_loader):
                inputs, targets_a, targets_t = inputs.to(device), targets_a.to(device), targets_t.to(device)
                yhat_valid_a, yhat_valid_t = net(inputs)
                loss_valid = criterion_a(yhat_valid_a,targets_a.to(torch.long).squeeze(1)) + criterion_t(yhat_valid_t,targets_t)
                validation_loss+= loss_valid.item()
                num_valid+= 1
        avg_training_loss = training_loss/num_train
        avg_validation_loss = validation_loss/num_valid        
        print("Epoch: {}, Training loss : {}, Validation loss : {}".format(epoch,avg_training_loss,avg_validation_loss))
        if (epoch==0): 
            best_loss = avg_validation_loss
            best_model = copy.deepcopy(net)
        else:
            if (best_loss - avg_validation_loss >= min_delta):
                best_model = copy.deepcopy(net)
                best_loss = avg_validation_loss
                not_improved_count = 0
            else:
                not_improved_count += 1
        # Early stopping
        if not_improved_count == early_stop_patience:
            print("Validation performance didn\'t improve for {} epochs. "
                            "Training stops.".format(early_stop_patience))
            break
    training_time = time.time() - start_time
    print("Training time:", training_time)
    return best_model


def lstm_direct_evaluate(net, data_loader, dtype, device):
    criterion_a = nn.CrossEntropyLoss()
    criterion_t = nn.L1Loss()
    net.eval()
    loss = 0
    total = 0
    with torch.no_grad():
        for i,(inputs,targets_a, targets_t) in enumerate(data_loader):
            # move data to proper dtype and device
            inputs = inputs.to(dtype=dtype, device=device)
            targets_a, targets_t = targets_a.to(device=device), targets_t.to(device=device)
            outputs_a, outputs_t = net(inputs)
            loss += (criterion_a(outputs_a,targets_a.to(torch.long).squeeze(1)) + criterion_t(outputs_t,targets_t))
            total += 1
    return loss.item() / total


def train_evaluate(parameterization):

    # constructing a new training data loader allows us to tune the batch size
    train_loader = DataLoader(EventLogData(torch.tensor(X_train), torch.tensor(y_a_train), torch.tensor(y_t_train)),
                                batch_size=parameterization.get("batchsize", 32),
                                shuffle=True)
    
    # Get neural net
    untrained_net = LSTM(parameterization)
    # train
    trained_net = net_train(net=untrained_net, train_loader=train_loader, valid_loader = valid_loader, 
                            parameters=parameterization, dtype=dtype, device=device, early_stop_patience = 10)
    
    # return the accuracy of the model as it was trained in this run
    return lstm_direct_evaluate(
        net=trained_net,
        data_loader=valid_loader,
        dtype=dtype,
        device=device,
    )

In [None]:
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "neurons", "type": "choice", "values": [40, 60, 80, 100], "value_type": "int"},
        {"name": "shared_layers", "type": "choice", "values": [1, 2], "value_type": "int"},
        {"name": "layers", "type": "choice", "values": [1, 2, 3], "value_type": "int"},
        {"name": "lr", "type": "range", "bounds": [1e-4, 0.1], "value_type": "float", "log_scale": True},
        {"name": "batchsize", "type": "choice", "values": [16, 32, 64], "value_type": "int"}, 
        {"name": "dropout", "type": "range", "bounds": [0, 0.5], "value_type": "float"}
    ],
  
    evaluation_function=train_evaluate,
    objective_name='loss',
    minimize = True,
    random_seed = 123,
    total_trials = 100
)

print(best_parameters)
means, covariances = values
print(means)

In [None]:
data = experiment.fetch_data()
df = data.df
best_arm_name = df.arm_name[df['mean'] == df['mean'].min()].values[0]
best_arm = experiment.arms_by_name[best_arm_name]
best_arm

## 4. Re-train model with tuned hyperparameters

In [None]:
# Creating the LSTM class
class LSTM_model(nn.Module):
    def __init__(self, hidden_dim, num_shared_layers, num_layers, droppout_prob):
        super(LSTM_model, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_shared_layers = num_shared_layers
        self.num_layers =  num_layers
        self.droppout_prob = droppout_prob
                
        self.lstm = nn.LSTM(input_size=num_features, hidden_size=self.hidden_dim, 
                            num_layers=self.num_shared_layers, batch_first=True, dropout=self.droppout_prob) 
        self.lstm_a = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, 
                            num_layers=self.num_layers, batch_first=True, dropout=self.droppout_prob) 
        self.lstm_t = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, 
                            num_layers=self.num_layers, batch_first=True, dropout=self.droppout_prob)
                
        self.fc_a = nn.Linear(self.hidden_dim, num_activities)
        self.fc_t = nn.Linear(self.hidden_dim, 1)
        self.softmax = nn.Softmax(dim=1)

    
    # Progresses data across layers    
    def forward(self, x):
        batch_size = x.size(0) 
        init_states_shared, init_cells_shared = self.init_hidden(batch_size, self.num_shared_layers)
        init_states_shared = init_states_shared.to(x.device)
        init_cells_shared = init_cells_shared.to(x.device)
        
        init_states, init_cells = self.init_hidden(batch_size, self.num_layers)
        init_states = init_states.to(x.device)
        init_cells = init_cells.to(x.device)
                
        shared_lstm_output, (last_Hidden_State, last_Cell_State) = self.lstm(x, (init_states_shared, init_cells_shared)) 
        lstm_output_a, (last_Hidden_State_a, last_Cell_State_a) = self.lstm_a(shared_lstm_output, 
                                                                                      (init_states, init_cells)) 
        lstm_output_t, (last_Hidden_State_t, last_Cell_State_t) = self.lstm_t(shared_lstm_output, 
                                                                                      (init_states, init_cells))
        out_a = self.softmax(self.fc_a(last_Hidden_State_a[-1]))
        
        out_t = self.fc_t(last_Hidden_State_t[-1])
        return out_a, out_t

    def init_hidden(self, batch_size, num_layers):
        init_states = []
        init_cells = []
        for i in range(num_layers):
            init_states.append(torch.zeros(batch_size, self.hidden_dim))
            init_cells.append(torch.zeros(batch_size, self.hidden_dim))
        return torch.stack(init_states, dim=0), torch.stack(init_cells, dim=0)      #(num_layers, B, H)

In [None]:
batch_size = best_arm.parameters['batchsize']

train_loader = DataLoader(EventLogData(torch.tensor(X_train), torch.tensor(y_a_train), torch.tensor(y_t_train)),
                                batch_size=batch_size,
                                shuffle=True)

In [None]:
save_folder = project_dir + '5_Output_files/Remaining_time_prediction/'+data_name+'_Tax_LSTM_direct'
hidden_dim = best_arm.parameters['neurons']
num_layers = best_arm.parameters['layers']
droppout_prob = best_arm.parameters['dropout']
lr_value = best_arm.parameters['lr']
min_delta = 0
num_epochs = 100
early_stop_patience = 10
num_runs = 5
# Define loss and optimizer   
for run in range(num_runs):
    print("Run: {}".format(run+1))
    model = LSTM_direct_model(hidden_dim, num_layers, droppout_prob)
    model.to(dtype=dtype, device=device) 
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=lr_value)
    epochs_plt = []
    mae_plt = []
    valid_loss_plt = []
    not_improved_count = 0
    # Train Network   
    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        training_loss = 0
        num_train = 0
        for inputs, labels in train_loader:
            # move data to proper dtype and device
            inputs = inputs.to(dtype=dtype, device=device)
            labels = labels.to(device=device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            output = model(inputs)
            loss = criterion(output, labels)
            # back prop
            loss.backward()
            # optimize
            optimizer.step()
            training_loss+= loss.item()
            num_train+=1
        with torch.no_grad():
            model.eval()
            num_valid = 0
            validation_loss = 0
            for i,(inputs,targets) in enumerate(valid_loader):
                inputs,targets = inputs.to(device),targets.to(device)
                yhat_valid = model(inputs)
                loss_valid = criterion(yhat_valid,targets)
                validation_loss+= loss_valid.item()
                num_valid+= 1
        avg_training_loss = training_loss/num_train
        avg_validation_loss = validation_loss/num_valid        
        print("Epoch: {}, Training MAE : {}, Validation loss : {}".format(epoch,avg_training_loss,avg_validation_loss))
        epochs_plt.append(epoch+1)
        mae_plt.append(avg_training_loss)
        valid_loss_plt.append(avg_validation_loss)
        if (epoch==0): 
            best_loss = avg_validation_loss
            torch.save(model.state_dict(),'{}/best_model_run_{}.pt'.format(save_folder,run+1))
            best_model = copy.deepcopy(model)
        else:
            if (best_loss - avg_validation_loss >= min_delta):
                torch.save(model.state_dict(),'{}/best_model_run_{}.pt'.format(save_folder,run+1))
                best_model = copy.deepcopy(model)
                best_loss = avg_validation_loss
                not_improved_count = 0
            else:
                not_improved_count += 1
        # Early stopping
        if not_improved_count == early_stop_patience:
            print("Validation performance didn\'t improve for {} epochs. "
                            "Training stops.".format(early_stop_patience))
            break
    training_time = time.time() - start_time
    print("Training time:", training_time)
    filepath = '{}/Loss_'.format(save_folder)+data_name+'_run{}.txt'.format(run)    
    with open(filepath, 'w') as file:
        for item in zip(epochs_plt,mae_plt,valid_loss_plt):
            file.write("{}\n".format(item))
        file.write("Running time: {}\n".format(training_time))

## 5. Evaluation

In [None]:
def Extract_trace_and_timed_features_rt(tab):
    # Extract trace and compute the 4 timed features for each event
    lastcase = ''
    line = [] # to store all activities of each case
    firstLine = True
    lines = [] # to store activities of all cases
    lines_t = [] # to store all timediff from last event of all cases
    lines_t2 = [] # to store all timediff2 from start case event of all cases
    lines_t3 = [] # to store all timediff3 from midnight of all cases
    lines_t4 = [] # to store all timediff4 day in week of all cases
    lines_t5 = [] # to store the time of current event
    times = []  # to store all timediff in a case
    times2 = [] # to store all timediff2 in a case
    times3 = [] # to store all timediff3 in a case
    times4 = [] # to store all timediff4 in a case
    times5 = [] # to store all t5 in a case
    casestarttime = None
    lasteventtime = None
    for i in range(len(tab)):
        t = time.mktime(datetime.datetime.strptime(tab['timestamp'][i],"%Y/%m/%d %H:%M:%S").timetuple())
        if tab['Case_ID'][i] != lastcase: # if its a new case
            casestarttime = t
            lasteventtime = t
            lastcase = tab['Case_ID'][i]
            if not firstLine: # add the previous case
                lines.append(line)
                lines_t.append(times)
                lines_t2.append(times2)
                lines_t3.append(times3)
                lines_t4.append(times4)
                lines_t5.append(times5)
            line = []
            times = []
            times2 = []
            times3 = []
            times4 = []
            times5 = []
        line.append(tab['Activity'][i])
        timesincelastevent = t - lasteventtime
        timesincecasestart = t - casestarttime
        midnight = datetime.datetime.fromtimestamp(t).replace(hour=0, minute=0, second=0, microsecond=0)
        current_time = datetime.datetime.fromtimestamp(t)
        timesincemidnight = (current_time-midnight).total_seconds()
        dayinweek = current_time.weekday() #day of the week
        times.append(timesincelastevent)
        times2.append(timesincecasestart)
        times3.append(timesincemidnight)
        times4.append(dayinweek)
        times5.append(current_time)
        lasteventtime = t
        firstLine = False

    # add the last case
    lines.append(line)
    lines_t.append(times)
    lines_t2.append(times2)
    lines_t3.append(times3)
    lines_t4.append(times4)
    lines_t5.append(times5)
    return lines, lines_t, lines_t2, lines_t3, lines_t4, lines_t5


def Extract_prefix_remaining_time(lines, lines_t, lines_t2, lines_t3, lines_t4, lines_t5):
    step = 1
    sentences = []
    end_ope = []
    sentences_t = []
    sentences_t2 = []
    sentences_t3 = []
    sentences_t4 = []
    sentences_t5 = []
    end_ope_t = []
    for line, line_t, line_t2, line_t3, line_t4, line_t5 in zip(lines, lines_t, lines_t2, lines_t3, lines_t4, lines_t5):
        for i in range(2, len(line), step):
            sentences.append(line[0: i])
            sentences_t.append(line_t[0:i])
            sentences_t2.append(line_t2[0:i])
            sentences_t3.append(line_t3[0:i])
            sentences_t4.append(line_t4[0:i])
            sentences_t5.append(line_t5[0:i])
            end_ope.append(line[-1])
            end_ope_t.append(line_t2[-1] - line_t2[i-1])
    return sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, sentences_t5, end_ope, end_ope_t

def encode(sentence, sentence_t, sentence_t5, num_features, maxlen, encoder, divisor, divisor2):
    X = np.zeros((1, maxlen, num_features), dtype=np.float32)
    leftpad = maxlen-len(sentence)
    times2 = np.cumsum(sentence_t)
    one_hot_act_matrix = encoder.transform(np.array(sentence).reshape((len(sentence), 1))).toarray()
    for t, char in enumerate(sentence):
        X[0, t+leftpad, :num_features-5] = one_hot_act_matrix[t, :]        
        midnight = sentence_t5[t].replace(hour=0, minute=0, second=0, microsecond=0)
        timesincemidnight = sentence_t5[t]-midnight
        X[0, t+leftpad, num_features-5] = t+1
        X[0, t+leftpad, num_features-4] = sentence_t[t]/divisor
        X[0, t+leftpad, num_features-3] = times2[t]/divisor2
        X[0, t+leftpad, num_features-2] = timesincemidnight.seconds/86400
        X[0, t+leftpad, num_features-1] = sentence_t5[t].weekday()/7
    return X


def remaining_time_pred_recursive_MP(input_prefixes, model,
                                  num_features, maxlen, encoder, divisor, divisor2):
    model.to(device)
    model.eval()
    def model_predict(input_prefix):
        sentence, sentence_t, sentence_t5, end_ope_t = copy.deepcopy(input_prefix)
#         total_sequence = copy.deepcopy(sentence)
#         total_time = copy.deepcopy(sentence_t)
        prefix_size = len(sentence)
        for i in range(prefix_size, maxlen):
            X_test = encode(sentence, sentence_t, sentence_t5, num_features, maxlen, encoder, divisor, divisor2)
            X_test = torch.from_numpy(X_test).to(device)
            y_a_predict, y_t_predict = model(X_test)
            binary_vect = 1*(y_a_predict >= y_a_predict.max()).squeeze()
            numeric_class = (binary_vect == 1).nonzero(as_tuple=True)[0].item()
            next_a = le.inverse_transform([numeric_class])[0]
            delta_t = y_t_predict.item()*divisor
#             total_sequence.append(next_a)
#             total_time.append(delta_t)
            next_t = sentence_t5[-1] + datetime.timedelta(seconds = delta_t)
            if next_a != end_act:
                sentence.append(next_a)
                sentence_t.append(y_t_predict.item())           
                sentence_t5.append(next_t)
            else:
                break
        rt = (next_t - sentence_t5[prefix_size-1]).total_seconds()
        return rt, end_ope_t, prefix_size
    
    list_rts = []
    list_end_ope_t = []
    list_prefixe_size = []
    l = len(input_prefixes)
    nb_processes = multiprocessing.cpu_count()
    pool = multiprocessing.dummy.Pool(processes = nb_processes)  
    try:
        for i, data in enumerate(pool.imap_unordered(model_predict, input_prefixes)):
            list_rts.append(data[0]) 
            list_end_ope_t.append(data[1])
            list_prefixe_size.append(data[2])
            printProgress(i, l, prefix='Predicting from {}'.format(l) + ' samples',
                  suffix='already completed {}'.format(i + 1),
                  decimals=0, barLength=40)
    except KeyboardInterrupt:
        print("got Ctrl+C")
    finally:
        pool.terminate()
        pool.join()   
    return list_rts, list_end_ope_t, list_prefixe_size


def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100, fill = '█'):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        barLength   - Optional  : character length of bar (Int)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(barLength * iteration // total)
    bar = fill * filledLength + '-' * (barLength - filledLength)
    sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percent, '%', suffix))
#     if iteration == total:
#         sys.stdout.write('\n')
    sys.stdout.flush()

In [None]:
# Test data
lines, lines_t, lines_t2, lines_t3, lines_t4, lines_t5 = Extract_trace_and_timed_features_rt(tab_test)
sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, sentences_t5, end_ope, end_ope_t = Extract_prefix_remaining_time(lines, lines_t, lines_t2, lines_t3, lines_t4, lines_t5)

list_inputs = [[sentences[i].copy(), sentences_t[i].copy(), sentences_t5[i].copy(), end_ope_t[i]] for i in range(len(sentences))]

In [None]:
num_samples_dict = {}
for sentence in sentences:
    key = len(sentence)
    if key in num_samples_dict.keys():
        num_samples_dict[key] += 1
    else:
        num_samples_dict[key] = 1

In [None]:
num_runs = 5
list_mae_time = []
list_mape_time = []
err_total_dict = {}
for run in range(num_runs):
    print("Run: {}".format(run+1))
    trained_model = LSTM_model(hidden_dim, num_shared_layers, num_layers, droppout_prob)
    trained_model.load_state_dict(torch.load('{}/best_model_run_{}.pt'.format(save_folder,run+1),
                                         map_location=torch.device(device)))
    list_rts, list_rts_true, list_prefixe_size = remaining_time_pred_recursive_MP(list_inputs, trained_model,
                                  num_features, maxlen, encoder, divisor, divisor2)
    tab_results = pd.DataFrame({'Prefix length':list_prefixe_size, 'Y_predicted': list_rts, 'Y_true': list_rts_true})
    for key in list(tab_results['Prefix length'].unique()):
        filtered_tab = tab_results[tab_results['Prefix length']==key]
        mae_err = metrics.mean_absolute_error(filtered_tab['Y_predicted'], filtered_tab['Y_true'])/86400
        mape_err = np.mean(np.abs((filtered_tab['Y_true'] - filtered_tab['Y_predicted'])/filtered_tab['Y_true']))*100
        if key in err_total_dict.keys():
            err_total_dict[key].append([mape_err, mae_err])
        else:
            err_total_dict[key] = [[mape_err, mae_err]]
    mae_t = metrics.mean_absolute_error(list_rts, list_rts_true)/86400
    list_mae_time.append(round(mae_t,2))
    mape_t = np.mean(np.abs((np.array(list_rts_true)- np.array(list_rts))/np.array(list_rts_true)))*100
    list_mape_time.append(round(mape_t,2))

In [None]:
list_prefix_len = []
list_num_samples = []
list_mape_err = []
list_mape_std = []
list_mae_err = []
list_mae_std = []
for key, value in err_total_dict.items():
    list_prefix_len.append(key)
    list_num_samples.append(num_samples_dict[key])
    list_mape_err.append(round(np.array(err_total_dict[key]).mean(axis = 0)[0], 3))
    list_mape_std.append(round(np.array(err_total_dict[key]).std(axis = 0)[0], 3))
    list_mae_err.append(round(np.array(err_total_dict[key]).mean(axis = 0)[1], 3))
    list_mae_std.append(round(np.array(err_total_dict[key]).std(axis = 0)[1], 3))
tab_result = pd.DataFrame({"Prefix length":list_prefix_len, "Num samples": list_num_samples, 
                           "MAPE(%)":list_mape_err, "MAPE std": list_mape_std,
                           "MAE(days)": list_mae_err, "MAE std": list_mae_std})
tab_result = tab_result.sort_values('Prefix length')
tab_result

In [None]:
tab = tab_result[tab_result["Num samples"] >= 20]
sum(tab["Num samples"]*tab["MAE(days)"])/sum(tab["Num samples"])

In [None]:
tab_result.to_csv(project_dir+"4_Outputs/Evaluation/"+data_name+"_Tax_LSTM_recursive_eval.csv", index = False)