# CAUSAL INFORMED PREDICTIVE MONITORING
---
## Dataset: BPI Challenge 2015_1

imports

In [None]:
import pandas as pd
import missingno as msno
import causality
import elp

load the data


In [None]:
log = pd.read_csv(
    "../data/bpi2015_1.csv", sep=",", encoding="iso-8859-1"
)

In [None]:
log.head()

In [None]:
log.describe()

In [None]:
msno.matrix(log)

In [None]:
log.fillna("UNKNOWN", inplace=True)

In [None]:
msno.matrix(log)

In [None]:
log.rename(columns={"case": "id", "completeTime": "timestamp"}, inplace=True)

In [None]:
log

In [None]:
log["id"].describe()

In [None]:
log["timestamp"]

preprocessing and encode

In [None]:
log = elp.EventLog(log, "id", "timestamp")

In [None]:
log.df.columns

In [None]:
log.df.head()

In [None]:
colums_to_encode = list(log.df.select_dtypes(include="object").columns)
colums_to_encode.remove("Responsible_actor")
colums_to_encode.remove("landRegisterID")
colums_to_encode.remove("SUMleges")
colums_to_encode.remove("IDofConceptCase")
print(colums_to_encode)

causality.label_encode(log.df, colums_to_encode)

log.df.head()

In [None]:
import elp.encoders as en

encoder = en.LogEncoder(
    transformers=[
        ("drop", "drop", ["id"]),
        (
            "keep",
            "keep",
            [
                "termName",
                "startDate",
                "caseProcedure",
                "caseStatus",
                "Includes_subCases",
                "endDatePlanned",
                "endDate",
                "parts",
                "requestComplete",
                "last_phase",
                "case_type",
                "event",
                "activityNameEN",
                "action_code",
                "activityNameNL",
                "planned",
                "question",
                "monitoringResource",
            ],
        ),
        (
            "timestamp",
            en.TimestampFeatures(
                log.id_column,
                [
                    "event_order",
                    "time_from_start",
                    "time_from_midnight",
                    "total_time",
                    "elapsed_time_from_event",
                ],
                unit="1h",
            ),
            [log.timestamp_column],
        ),
    ]
)

encoder.check_unused(log)

In [None]:
dataset = encoder.fit_transform(log)

In [None]:
dataset

run base experiment

In [None]:
results_base = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2015",
    experiment="base",
)

causal inference and causal informed prediction

In [None]:
graph = causality.causal_inference_fci(dataset, "BPI2015")

In [None]:
target_index = dataset.columns.get_loc("elapsed_time_from_event_timestamp")

parents_index = causality.get_parents(
    graph, target_index
)

parents = dataset.columns[parents_index].tolist()
parents

run only 1 order parents experiment

In [None]:
columns = parents + ["elapsed_time_from_event_timestamp"]

results_1_order_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2015",
    experiment="1_order_parents",
    columns=columns,
)

run without parents experiments

In [None]:
columns = filter(lambda i: i not in parents, dataset.columns.tolist())

results_no_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2015",
    experiment="no_parents",
    columns=columns
)

run second order parents experiments

In [None]:
parents_2_order_index = causality.get_parents(
    graph, target_index, depth=2
)
parents_2_order = dataset.columns[parents_2_order_index].tolist()
parents_2_order

In [None]:
columns = parents_2_order + ["elapsed_time_from_event_timestamp"]

results_2_order_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2015",
    experiment="2_order_parents",
    columns=columns
)