In [19]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle
from utils_test import *

In [20]:
from jellyfish._jellyfish import damerau_levenshtein_distance
import distance
from sklearn import metrics

In [21]:
from keras.models import load_model

In [22]:
name = 'helpdesk'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{}/'.format(name),
    'modelname': 'model_24-1.49.h5',
}

args = argparse.Namespace(**args)

In [23]:
test = pd.read_csv(args.inputdir+'test.csv')
test = transformDf(test)

In [24]:
with open(args.inputdir+'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [25]:
with open(args.inputdir+'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

# Utils

In [26]:
model = load_model(args.outputdir+args.modelname)

In [28]:
test_groupByCase = test.groupby(['CaseID'])

#get features all data
sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4 = getFeature(test_groupByCase)

#get output all data
next_chars, next_chars_t, next_chars_t2, next_chars_t3, next_chars_t4 = getOutput(test_groupByCase)

# Check longest case --> suffix: 2 to 7
test_len = findLongestLength(test_groupByCase)
test_len

13

In [29]:
def getSuffix(suffix, sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, next_chars, next_chars_t):
    sentences_2 = []
    sentences_t_2 = []
    sentences_t2_2 = []
    sentences_t3_2 = []
    sentences_t4_2 = []
    next_chars_2 = []
    next_chars_t_2 = []

    for i in range(len(sentences)):
        if len(sentences[i]) == suffix and next_chars[i] != 'EOS':
            sentences_2.append(sentences[i])
            sentences_t_2.append(sentences_t[i])
            sentences_t2_2.append(sentences_t2[i])
            sentences_t3_2.append(sentences_t3[i])
            sentences_t4_2.append(sentences_t4[i])
            next_chars_2.append(next_chars[i])
            next_chars_t_2.append(next_chars_t[i])
    return sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, next_chars_2, next_chars_t_2

In [30]:
def evalAct(true_label, pred_prob, targetchartoindice):
    #for log-loss: get probabilities
    gt_a = one_hot_encode(true_label, targetchartoindice)
    log_loss = metrics.log_loss(gt_a, pred_prob)
    print('Log-loss: {}'.format(log_loss))
    
    #for accuracy: get labels
    gt_a_label = true_label
    pred_a_label = getLabel(pred_prob, targetchartoindice)
    acc = metrics.accuracy_score(gt_a_label, pred_a_label)
    print('Accuracy: {}%'.format(acc*100))
    acc_3 = get_top3_accuracy(pred_prob, gt_a_label, targetchartoindice)
    print('Top 3 accuracy: {}%'.format(acc_3*100))

In [31]:
def evalTime(true_time, pred_time, divisor):
    #get time
    gt_t = true_time
    pred_t = inverseTime(pred_time, divisor)
    
    mse = metrics.mean_squared_error(gt_t, pred_t)
    mae = metrics.mean_absolute_error(gt_t, pred_t)
    median = metrics.median_absolute_error(gt_t, pred_t)
    
    print('Mean Squared Error: {0}s\t| {1} days'.format(mse, mse/86400))
    print('Mean Absolute Error: {0}s\t| {1} days'.format(mae, mae/86400))
    print('Median Absolute Error: {0}s\t| {1} days'.format(median, median/86400))

In [32]:
# DL distance
#distance.nlevenshtein(gt_a_label, pred_a_label) #0.1731066460587326

#damerau_levenshtein_distance(gt_a_label, pred_a_label) #784

#distance.jaccard(gt_a_label, pred_a_label) #0.4444444444444444

# Evaluation

In [33]:
max_suffix = test_len-2

In [34]:
for i in range(2, max_suffix+1):
    print('----------Suffix {}------------'.format(i))
    sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, next_chars_2, next_chars_t_2 = getSuffix(i, sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, next_chars, next_chars_t)
    X_test = vectorizeInput(sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, 
                        maxlen, num_features, chartoindice, 
                        divisor, divisor2, divisor3=86400, divisor4=7)
    pred = model.predict(X_test, verbose=0)
    pred_a = pred[0]
    pred_t = pred[1]
    print('Evaluation of Activity')
    evalAct(next_chars_2, pred[0], targetchartoindice)
    print('Evaluation of Time')
    evalTime(next_chars_t_2, pred_t, divisor)
    print('\n')
    #Add gt and pred of each suffix
    if i == 2:
        full_gt_a = next_chars_2
        full_pred_a = pred_a
        full_gt_t = next_chars_t_2
        full_pred_t = pred_t
    else:
        full_gt_a += next_chars_2
        full_pred_a = np.concatenate((full_pred_a, pred_a), axis=0)
        full_gt_t += next_chars_t_2
        full_pred_t = np.concatenate((full_pred_t, pred_t), axis=0)

----------Suffix 2------------
Evaluation of Activity
Log-loss: 0.6828893884333471
Accuracy: 76.75%
Top 3 accuracy: 98.66666666666667%
Evaluation of Time
Mean Squared Error: 365781689848.81305s	| 4233584.373250151 days
Mean Absolute Error: 292164.1585622915s	| 3.3815296129894845 days
Median Absolute Error: 35396.03515625s	| 0.4096763328269676 days


----------Suffix 3------------
Evaluation of Activity
Log-loss: 1.1953289699688385
Accuracy: 56.62921348314607%
Top 3 accuracy: 98.42696629213484%
Evaluation of Time
Mean Squared Error: 507340565831.0474s	| 5871997.289711197 days
Mean Absolute Error: 421881.4451555231s	| 4.882887096707443 days
Median Absolute Error: 241324.93359375s	| 2.7931126573350693 days


----------Suffix 4------------
Evaluation of Activity
Log-loss: 0.9100996918021105
Accuracy: 66.66666666666666%
Top 3 accuracy: 99.48717948717949%
Evaluation of Time
Mean Squared Error: 250306428253.47235s	| 2897065.1418225965 days
Mean Absolute Error: 228712.23642403775s	| 2.64713236

In [35]:
evalAct(full_gt_a, full_pred_a, targetchartoindice)

Log-loss: 0.8338861819235183
Accuracy: 70.84796788760663%
Top 3 accuracy: 98.74560963371802%


In [36]:
evalTime(full_gt_t, full_pred_t, divisor)

Mean Squared Error: 387628924245.8491s	| 4486445.882475105 days
Mean Absolute Error: 313622.40525885794s	| 3.6298889497553 days
Median Absolute Error: 53174.65234375s	| 0.6154473650896991 days


In [37]:
len(full_gt_a)

1993

# Test single case

## Predict

In [None]:
sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, next_chars_2, next_chars_t_2 =  getSuffix(2, sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, next_chars, next_chars_t)

In [None]:
len(sentences_2), len(next_chars_2)

In [None]:
X_test = vectorizeInput(sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, 
                        maxlen, num_features, chartoindice, 
                        divisor, divisor2, divisor3=86400, divisor4=7)

In [None]:
pred = model.predict(X_test, verbose=0)
pred_a = pred[0]
pred_t = pred[1]

In [None]:
pred[0].shape, pred[1].shape

## Evaluate

### Activity

In [None]:
evalAct(next_chars_2, pred[0], targetchartoindice)

### Time

In [None]:
evalTime(next_chars_t_2, pred_t, divisor)