In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle
from utils_test import *

In [2]:
from jellyfish._jellyfish import damerau_levenshtein_distance
import distance
from sklearn import metrics

In [3]:
from keras.models import load_model
#from theano.ifelse import ifelse #added this

Using Theano backend.


In [4]:
name = 'helpdesk'
#name = 'bpi_12_w'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{}/'.format(name),
    'modelname': 'model_17-1.51.h5',
}

args = argparse.Namespace(**args)

In [5]:
test = pd.read_csv(args.inputdir+'test.csv')
test = transformDf(test)

In [6]:
with open(args.inputdir+'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [7]:
with open(args.inputdir+'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

# Utils

In [8]:
model = load_model(args.outputdir+args.modelname)

In [9]:
test_groupByCase = test.groupby(['CaseID'])

#get features all data
sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4 = getFeature(test_groupByCase)

#get output all data
next_chars, next_chars_t, next_chars_t2, next_chars_t3, next_chars_t4 = getOutput(test_groupByCase)

# Check longest case --> suffix: 2 to 7
test_len = findLongestLength(test_groupByCase)

In [10]:
def getSuffix(suffix, sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, next_chars, next_chars_t):
    sentences_2 = []
    sentences_t_2 = []
    sentences_t2_2 = []
    sentences_t3_2 = []
    sentences_t4_2 = []
    next_chars_2 = []
    next_chars_t_2 = []

    for i in range(len(sentences)):
        if len(sentences[i]) == suffix and next_chars[i] != 'EOS':
            sentences_2.append(sentences[i])
            sentences_t_2.append(sentences_t[i])
            sentences_t2_2.append(sentences_t2[i])
            sentences_t3_2.append(sentences_t3[i])
            sentences_t4_2.append(sentences_t4[i])
            next_chars_2.append(next_chars[i])
            next_chars_t_2.append(next_chars_t[i])
    return sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, next_chars_2, next_chars_t_2

In [11]:
def evalAct(true_label, pred_prob, targetchartoindice):
    #for log-loss: get probabilities
    gt_a = one_hot_encode(true_label, targetchartoindice)
    log_loss = metrics.log_loss(gt_a, pred_prob)
    print('Log-loss: {}'.format(log_loss))
    
    #for accuracy: get labels
    gt_a_label = true_label
    pred_a_label = getLabel(pred_prob, targetchartoindice)
    acc = metrics.accuracy_score(gt_a_label, pred_a_label)
    print('Accuracy: {}%'.format(acc*100))
    acc_3 = get_top3_accuracy(pred_prob, gt_a_label, targetchartoindice)
    print('Top 3 accuracy: {}%'.format(acc_3*100))

In [12]:
def evalTime(true_time, pred_time, divisor):
    #get time
    gt_t = true_time
    pred_t = inverseTime(pred_time, divisor)
    
    mse = metrics.mean_squared_error(gt_t, pred_t)
    mae = metrics.mean_absolute_error(gt_t, pred_t)
    median = metrics.median_absolute_error(gt_t, pred_t)
    
    print('Mean Squared Error: {0}s\t| {1} days'.format(mse, mse/86400))
    print('Mean Absolute Error: {0}s\t| {1} days'.format(mae, mae/86400))
    print('Median Absolute Error: {0}s\t| {1} days'.format(median, median/86400))

In [13]:
# DL distance
#distance.nlevenshtein(gt_a_label, pred_a_label) #0.1731066460587326

#damerau_levenshtein_distance(gt_a_label, pred_a_label) #784

#distance.jaccard(gt_a_label, pred_a_label) #0.4444444444444444

# Evaluation

In [14]:
max_suffix = test_len-2

In [16]:
for i in range(2, max_suffix+1):
    print('----------Suffix {}------------'.format(i))
    sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, next_chars_2, next_chars_t_2 = getSuffix(i, sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, next_chars, next_chars_t)
    X_test = vectorizeInput(sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, 
                        maxlen, num_features, chartoindice, 
                        divisor, divisor2, divisor3=86400, divisor4=7)
    pred = model.predict(X_test, verbose=0)
    pred_a = pred[0]
    pred_t = pred[1]
    print('Evaluation of Activity')
    evalAct(next_chars_2, pred[0], targetchartoindice)
    print('Evaluation of Time')
    evalTime(next_chars_t_2, pred_t, divisor)
    print('\n')
    #Add gt and pred of each suffix
    if i == 2:
        full_gt_a = next_chars_2
        full_pred_a = pred_a
        full_gt_t = next_chars_t_2
        full_pred_t = pred_t
    else:
        full_gt_a += next_chars_2
        full_pred_a = np.concatenate((full_pred_a, pred_a), axis=0)
        full_gt_t += next_chars_t_2
        full_pred_t = np.concatenate((full_pred_t, pred_t), axis=0)

----------Suffix 2------------
Evaluation of Activity
Log-loss: 0.6857187334448099
Accuracy: 76.41666666666667%
Top 3 accuracy: 98.66666666666667%
Evaluation of Time
Mean Squared Error: 368701213405.48596s	| 4267375.155156087 days
Mean Absolute Error: 293230.35363421123s	| 3.3938698337292967 days
Median Absolute Error: 30161.84765625s	| 0.349095458984375 days


----------Suffix 3------------
Evaluation of Activity
Log-loss: 1.200921276714025
Accuracy: 51.23595505617977%
Top 3 accuracy: 98.65168539325843%
Evaluation of Time
Mean Squared Error: 515213611476.3365s	| 5963120.503198339 days
Mean Absolute Error: 418199.2392852612s	| 4.840268973209041 days
Median Absolute Error: 237434.1796875s	| 2.748080783420139 days


----------Suffix 4------------
Evaluation of Activity
Log-loss: 0.9436626728528585
Accuracy: 67.17948717948717%
Top 3 accuracy: 98.97435897435898%
Evaluation of Time
Mean Squared Error: 248391913882.45148s	| 2874906.4106765217 days
Mean Absolute Error: 220775.8820431538s	| 2.

In [17]:
evalAct(full_gt_a, full_pred_a, targetchartoindice)

Log-loss: 0.8372215497911695
Accuracy: 69.54340190667335%
Top 3 accuracy: 98.74560963371802%


In [18]:
evalTime(full_gt_t, full_pred_t, divisor)

Mean Squared Error: 391173722339.7733s	| 4527473.638191821 days
Mean Absolute Error: 313173.9219692334s	| 3.624698170940201 days
Median Absolute Error: 42856.27734375s	| 0.496021728515625 days


In [19]:
len(full_gt_a)

1993

# Test single case

## Predict

In [18]:
sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, next_chars_2, next_chars_t_2 =  getSuffix(10, sentences, sentences_t, sentences_t2, sentences_t3, sentences_t4, next_chars, next_chars_t)

In [19]:
len(sentences_2), len(next_chars_2), len(sentences)

(3, 3, 4529)

In [20]:
X_test = vectorizeInput(sentences_2, sentences_t_2, sentences_t2_2, sentences_t3_2, sentences_t4_2, 
                        maxlen, num_features, chartoindice, 
                        divisor, divisor2, divisor3=86400, divisor4=7)

In [21]:
pred = model.predict(X_test, verbose=0)
pred_a1 = pred[0]
pred_t = pred[1]

In [22]:
pred[0].shape, pred[1].shape

((3, 10), (3, 1))

## Evaluate

### Activity

In [23]:
evalAct(next_chars_2, pred[0], targetchartoindice)

Log-loss: 0.156090296494
Accuracy: 100.0%
Top 3 accuracy: 100.0%


### Time

In [24]:
evalTime(next_chars_t_2, pred_t, divisor)

Mean Squared Error: 80774949448.4s	| 934895.248246 days
Mean Absolute Error: 215542.325734s	| 2.49470284415 days
Median Absolute Error: 193860.84375s	| 2.24375976563 days
