# CAUSAL INFORMED PREDICTIVE MONITORING
---
## Dataset: BPI Challenge 2013

### imports

In [None]:
import pandas as pd
import missingno as msno
import causality
import elp
import os
import json

### load the data


In [None]:
log = pd.read_csv(
    "../data/BPI_Challenge_2013_incident.csv", sep=";", encoding="iso-8859-1"
)

In [None]:
log.head()

In [None]:
log.describe()

In [None]:
msno.matrix(log)

In [None]:
log["Involved ST Function Div"].fillna("UNKNOWN", inplace=True)

In [None]:
msno.matrix(log)

In [None]:
log.rename(columns={"SR Number": "id", "Change Date+Time": "timestamp"}, inplace=True)

In [None]:
log

In [None]:
log["id"].describe()

In [None]:
log["timestamp"]

### preprocessing and encode

In [None]:
log = elp.EventLog(log, "id", "timestamp")

In [None]:
log.df.columns

In [None]:
log.df.head()

In [None]:
colums_to_encode = list(log.df.select_dtypes(include="object").columns)
colums_to_encode.remove("id")
print(colums_to_encode)

causality.label_encode(log.df, colums_to_encode)

log.df.head()

In [None]:
import elp.encoders as en

encoder = en.LogEncoder(
    transformers=[
        ("drop", "drop", ["id"]),
        (
            "keep",
            "keep",
            [
                "Involved ST Function Div",
                "Involved Org line 3",
                "Involved ST",
                "Status",
                "Sub Status",
                "Owner Country",
                "Owner First Name",
                "SR Latest Impact",
                "Product",
                "Country",
            ],
        ),
        (
            "timestamp",
            en.TimestampFeatures(
                log.id_column,
                [
                    "event_order",
                    "time_from_start",
                    "time_from_midnight",
                    "total_time",
                    "elapsed_time_from_event",
                ],
                unit="1h",
            ),
            [log.timestamp_column],
        ),
    ]
)

encoder.check_unused(log)

In [None]:
dataset = encoder.fit_transform(log)

In [None]:
dataset

### run base experiment

In [None]:
results_base = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2013",
    experiment="base",
)

### causal inference and causal informed prediction

In [None]:
graph = causality.causal_inference_fci(dataset, "BPI2013")

In [None]:
parents = ["time_from_start_timestamp", "Involved ST", "Sub Status"]
parents_of_parents = {
    "time_from_start_timestamp": [
        "event_order_timestamp",
        "total_time_timestamp",
        "Owner Country",
    ],
    "Involved ST": [
        "Involved ST Function Div",
        "Involved Org line 3",
        "SR Latest Impact",
    ],
    "Sub Status": ["Status", "time_from_midnight_timestamp"],
}
parents_2_order = list(parents_of_parents.keys()) + [
    item for sublist in parents_of_parents.values() for item in sublist
]

### run only 1 order parents experiment

In [None]:
columns = parents + ["elapsed_time_from_event_timestamp"]

results_1_order_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2013",
    experiment="1_order_parents",
    columns=columns,
)

### run without parents experiments

In [None]:
columns = filter(lambda i: i not in parents, dataset.columns.tolist())

results_no_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2013",
    experiment="no_parents",
    columns=columns,
)

### run second order parents experiments

In [None]:
columns = parents_2_order + ["elapsed_time_from_event_timestamp"]

results_2_order_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="BPI2013",
    experiment="2_order_parents",
    columns=columns,
)