In [1]:
import numpy as np
import pandas
import pm4py
from matplotlib import pyplot as plt
from sklearn.mixture import GaussianMixture
import scipy.stats as stats
import ot
import os
from tqdm import tqdm
import collections
import matplotlib.dates as md
import importlib
import pickle
import random
import math
import CRPS.CRPS as pscore
import datetime

pandas.set_option('display.max_columns', None)

import sys
sys.path.append('../../TaskExecutionTimeMining/')
from event_log_transformer import *
from artificial_model import *

sys.path.append('../../Evaluation/')
from normal_evaluation.drbart_evaluation import *

In [2]:
with open('../../../data/AR/transformed_event_logs/AR_train.pickle', 'rb') as f:
    train_event_log = pickle.load(f)

train_event_log['case:concept:name'] = train_event_log['case:concept:name'].astype(str)
known_resources = ['1', 'Clark', 'Jane', 'Joe', 'Karsten']
known_activities = ['DIAGNOSIS', 'QUALITY_CONTROL', 'REPAIR']

In [3]:
N = 10000
n_processes = 50

import conduct_evaluation
get_pscores = lambda likelihoods : [pscore(likelihoods[1][i], likelihoods[2][k][3]).compute()[0] for i, k in enumerate(list(likelihoods[0].keys()))]

In [4]:
perfect_model = ArtificialModel()
evaluator = conduct_evaluation.ConductEvaluation(perfect_model, SampleOutcomes_DRBART_Normal_A_R_S_RC,
                                                   {
                                                        'activity_key' : 'concept:name',
                                                        'resource_key' : 'org:resource',
                                                        'known_resources' : known_resources,
                                                    },
                                     train_event_log, n=N, n_processes=n_processes)
likelihoods_train = evaluator.sample_cases(False, True)

100%|██████████| 1444/1444 [00:01<00:00, 998.11it/s]
100%|██████████| 1444/1444 [22:23<00:00,  1.07it/s]   
100%|██████████| 1444/1444 [10:39<00:00,  2.26it/s]  


In [5]:
np.mean([v.ln() for v in likelihoods_train[0].values()])

Decimal('0.5006857750341947878609485893')

In [6]:
np.mean(get_pscores(likelihoods_train))

np.float64(7946.005332185702)

In [7]:
with open('../../../data/AR/transformed_event_logs/AR_test.pickle', 'rb') as f:
    test_event_log = pickle.load(f)

In [8]:
test_event_log.rename(columns={
    'time_timestamp_start': 'time:timestamp_start',
    'time_timestamp_complete': 'time:timestamp_complete',
    'seconds_since_midnight': 'seconds_in_day',
}, inplace=True)

test_event_log

Unnamed: 0,concept:name,lifecycle:transition_start,time:timestamp_start,org:resource,case:concept:name,lifecycle:transition_complete,time:timestamp_complete,duration,duration_seconds,seconds_in_day,day_of_week,1,Clark,Jane,Joe,Karsten,DIAGNOSIS,QUALITY_CONTROL,REPAIR
2,DIAGNOSIS,START,2020-01-01 03:57:40.044121+00:00,Jane,0,COMPLETE,2020-01-01 04:34:23.549454+00:00,0 days 00:36:43.505333,2203,14260,2,0,0,1,0,0,1,0,0
7,REPAIR,START,2020-01-01 04:34:23.549454+00:00,Joe,0,COMPLETE,2020-01-01 14:30:27.423999+00:00,0 days 09:56:03.874545,35763,16463,2,0,0,1,1,0,1,0,1
12,QUALITY_CONTROL,START,2020-01-01 14:30:27.423999+00:00,Joe,0,COMPLETE,2020-01-01 22:13:45.345445+00:00,0 days 07:43:17.921446,27797,52227,2,0,0,1,2,0,1,1,1
17,DIAGNOSIS,START,2020-01-01 08:16:35.844753+00:00,Jane,1,COMPLETE,2020-01-01 08:47:14.772217+00:00,0 days 00:30:38.927464,1838,29795,2,0,0,1,0,0,1,0,0
22,REPAIR,START,2020-01-01 08:47:14.772217+00:00,Karsten,1,COMPLETE,2020-01-01 13:27:22.316694+00:00,0 days 04:40:07.544477,16807,31634,2,0,0,1,0,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26997,QUALITY_CONTROL,START,2024-12-26 19:03:41.622915+00:00,Joe,1799,COMPLETE,2024-12-26 21:39:17.312456+00:00,0 days 02:35:35.689541,9335,68621,3,1,0,0,1,1,1,1,1
27002,DIAGNOSIS,START,2024-12-26 18:05:59.540931+00:00,Clark,1800,COMPLETE,2024-12-26 19:13:26.892809+00:00,0 days 01:07:27.351878,4047,65159,3,0,1,0,0,0,1,0,0
27007,REPAIR,START,2024-12-26 19:13:26.892809+00:00,Clark,1800,COMPLETE,2024-12-27 08:23:46.946760+00:00,0 days 13:10:20.053951,47420,69206,3,0,2,0,0,0,1,0,1
27012,QUALITY_CONTROL,START,2024-12-27 08:23:46.946760+00:00,Jane,1800,COMPLETE,2024-12-27 11:10:44.009123+00:00,0 days 02:46:57.062363,10017,30226,4,0,2,1,0,0,1,1,1


In [9]:
perfect_model = ArtificialModel()
evaluator = conduct_evaluation.ConductEvaluation(perfect_model, SampleOutcomes_DRBART_Normal_A_R_S_RC,
                                                   {
                                                        'activity_key' : 'concept:name',
                                                        'resource_key' : 'org:resource',
                                                        'known_resources' : known_resources,
                                                    },
                                     test_event_log, n=N, n_processes=n_processes)
likelihoods_test = evaluator.sample_cases(False, True)

100%|██████████| 1444/1444 [00:01<00:00, 878.34it/s]
100%|██████████| 1444/1444 [26:47<00:00,  1.11s/it]   
100%|██████████| 1444/1444 [11:52<00:00,  2.03it/s]  


In [10]:
np.mean([v.ln() for v in likelihoods_test[0].values()])

Decimal('0.5029957726212640181525308880')

In [11]:
np.mean(get_pscores(likelihoods_test))

np.float64(7942.085152689373)