# CAUSAL INFORMED PREDICTIVE MONITORING
---
## Dataset: Traffic

imports

In [None]:
import pandas as pd
import missingno as msno
import causality
import elp

load the data


In [None]:
log = pd.read_csv("../data/traffic_fines_1.csv", sep=";", encoding="iso-8859-1")

In [None]:
log.head()

In [None]:
log.describe()

In [None]:
msno.matrix(log)

In [None]:
log.rename(columns={"Case ID": "id", "Complete Timestamp": "timestamp"}, inplace=True)
log.head()

In [None]:
log["id"].describe()

In [None]:
log["timestamp"] = pd.to_datetime(log["timestamp"])

In [None]:
selected_year = 2006

log_2006 = log[log['timestamp'].dt.year == selected_year]

log_other = log[log['timestamp'].dt.year != selected_year]

In [None]:
ids = log_2006["id"].unique()
other_ids = log_other["id"].unique()

for selected_id in ids:
    if selected_id in other_ids:
        log_2006 = log_2006.drop(log_2006[log_2006["id"] == selected_id].index)

log = log_2006.reset_index(drop=True)
log

preprocessing and encode

In [None]:
log = elp.EventLog(log, "id", "timestamp")

In [None]:
log.df.columns

In [None]:
log.df.head()

In [None]:
colums_to_encode = list(log.df.select_dtypes(include="object").columns)
colums_to_encode.remove("id")
colums_to_encode.remove("article")
colums_to_encode.remove("Resource")

print(colums_to_encode)

causality.label_encode(log.df, colums_to_encode)

log.df.head()

In [None]:
log.df["article"].replace("other", float("0"), inplace=True)
log.df["article"] = log.df["article"].astype(float)
log.df["Resource"].replace("other", float("0"), inplace=True)
log.df["Resource"] = log.df["Resource"].astype(float)

In [None]:
import elp.encoders as en

encoder = en.LogEncoder(
    transformers=[
        ("drop", "drop", ["id"]),
        (
            "keep",
            "keep",
            [
                "article",
                "vehicleClass",
                "amount",
                "points",
                "Activity",
                "Resource",
                "lastSent",
                "notificationType",
                "dismissal",
                "expense",
                "month",
                "weekday",
                "hour",
                "open_cases",
                "label",
            ],
        ),
        (
            "timestamp",
            en.TimestampFeatures(
                log.id_column,
                [
                    "event_order",
                    "time_from_start",
                    "time_from_midnight",
                    "total_time",
                    "elapsed_time_from_event",
                ],
                unit="1h",
            ),
            [log.timestamp_column],
        ),
    ]
)

encoder.check_unused(log)

In [None]:
dataset = encoder.fit_transform(log)

In [None]:
dataset

run base experiment

In [None]:
results_base = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="Traffic",
    experiment="base",
)

causal inference and causal informed prediction

In [None]:
graph = causality.causal_inference_fci(dataset, "Traffic")

In [None]:
target_index = dataset.columns.get_loc("elapsed_time_from_event_timestamp")

parents_index = causality.get_parents(graph, target_index)

parents = dataset.columns[parents_index].tolist()
parents

run only 1 order parents experiment

In [None]:
columns = parents + ["elapsed_time_from_event_timestamp"]

results_1_order_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="Traffic",
    experiment="1_order_parents",
    columns=columns,
)

run without parents experiments

In [None]:
columns = filter(lambda i: i not in parents, dataset.columns.tolist())

results_no_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="Traffic",
    experiment="no_parents",
    columns=columns,
)

run second order parents experiments

In [None]:
parents_2_order_index = causality.get_parents(graph, target_index, depth=2)
parents_2_order = dataset.columns[parents_2_order_index].tolist()
parents_2_order

In [None]:
columns = parents_2_order + ["elapsed_time_from_event_timestamp"]

results_2_order_parents = causality.run_experiment(
    dataset,
    target="elapsed_time_from_event_timestamp",
    name="Traffic",
    experiment="2_order_parents",
    columns=columns,
)