In [None]:
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.objects.conversion.log import converter
import pandas as pd
from create_benchmarks import remainTimeOrClassifBenchmark, nextEventBenchmark

## BPIC_2012

In [None]:
NAME = "BPI_Challenge_2012"
PATH = r"D:\Data\BPIC_2012"
START_DATE = None 
END_DATE = "2012-02"
MAX_DAYS = 32.28
TEST_LEN_SHARE = .2

In [None]:
KEYWORDS_DICT = {}
KEYWORDS_DICT["approved"] = ["A_REGISTERED_COMPLETE", "A_APPROVED_COMPLETE", "O_ACCEPTED_COMPLETE", "A_ACTIVATED_COMPLETE"]
KEYWORDS_DICT["declined"] = ["A_DECLINED_COMPLETE", "O_DECLINED_COMPLETE"]
KEYWORDS_DICT["canceled"] = ["A_CANCELLED_COMPLETE"]

In [None]:
log = xes_importer.apply(PATH + "/" + NAME + ".xes")
dataset = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)

In [None]:
# remaining time and classification
dataset["classif_target"] = dataset["concept:name"] + "_" + dataset["lifecycle:transition"]
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE, "xes", KEYWORDS_DICT)

In [None]:
# next event
dataset["activity"] = dataset["concept:name"] + "_" + dataset["lifecycle:transition"]
nextEventBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE, "xes", "activity")

## BPIC_2015

In [None]:
NAME = "BPIC15_" 
PATH = r"D:\Data\BPIC_2015"
START_DATE = "2010-10" 
END_DATE = "2015-03"
MAX_DAYS = 302.82
TEST_LEN_SHARE = .2

In [None]:
for munic_nr in range(1, 6):
    log = xes_importer.apply(PATH + "/" + NAME + str(munic_nr) +".xes")
    df = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)
    df["municipality"] = munic_nr
    df['case:concept:name'] = df['case:concept:name'].astype("int") * 100 + munic_nr  # create unique case numbers
    if munic_nr == 1:
        dataset = df
    else:
        dataset = pd.concat([dataset, df])
print("2015 total len:", len(dataset))
# remaining time
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE)

## BPIC_2017
(saved using pickle instead of xes)

In [None]:
NAME = "BPI Challenge 2017"     
PATH = r"D:\Data\BPIC_2017"
START_DATE = None 
END_DATE = "2017-01"
MAX_DAYS = 47.81
TEST_LEN_SHARE = .2
OUTPUT_TYPE = "pickle"

In [None]:
KEYWORDS_DICT = {}
KEYWORDS_DICT["approved"] = ["O_Accepted"]
KEYWORDS_DICT["declined"] = ["O_Refused"]
KEYWORDS_DICT["canceled"] = ["O_Cancelled"]

In [None]:
log = xes_importer.apply(PATH + "/" + NAME + ".xes")
dataset = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)

In [None]:
# remaining time and classification
dataset["classif_target"] = dataset["concept:name"] 
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE, OUTPUT_TYPE, KEYWORDS_DICT)

In [None]:
# next event
dataset["activity"] = dataset["concept:name"] + "_" + dataset["lifecycle:transition"]
nextEventBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE, OUTPUT_TYPE, "activity")

## BPIC_2019

In [None]:
NAME = "BPI_Challenge_2019"
PATH = r"D:\Data\BPIC_2019"
START_DATE = "2018-01"
END_DATE = "2019-02"
MAX_DAYS = 143.33
TEST_LEN_SHARE = .2

In [None]:
log = xes_importer.apply(PATH + "/" + NAME +".xes")
dataset = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)
# remaining time
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE)

## BPIC_2020 PAYMENTS

In [None]:
NAME = "RequestForPayment" 
PATH = r"D:\Data\BPIC_2020"
START_DATE = None 
END_DATE = "2018-12"
MAX_DAYS = 28.86
TEST_LEN_SHARE = .2

In [None]:
log = xes_importer.apply(PATH + "/" + NAME +".xes")
dataset = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)
# remaining time
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE)

## BPIC_2020 PERMITS

In [None]:
NAME = "PermitLog"
PATH = r"D:\Data\BPIC_2020"
START_DATE = None 
END_DATE = "2019-10"
MAX_DAYS = 258.81
TEST_LEN_SHARE = .2

In [None]:
log = xes_importer.apply(PATH + "/" + NAME +".xes")
dataset = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)
# remaining time
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE)

## BPIC_2020 TRAVEL COSTS

In [None]:
NAME = "PrepaidTravelCost" 
PATH = r"D:\Data\BPIC_2020"
START_DATE = None 
END_DATE = "2019-01"
MAX_DAYS = 114.26
TEST_LEN_SHARE = .2

In [None]:
log = xes_importer.apply(PATH + "/" + NAME +".xes")
dataset = converter.apply(log,variant=converter.Variants.TO_DATA_FRAME)
# remaining time
remainTimeOrClassifBenchmark(dataset, PATH, NAME, START_DATE, END_DATE, MAX_DAYS, TEST_LEN_SHARE)