In [1]:
import os
import json
import numpy as np
import pandas as pd

In [2]:
my_results_path = "outputs"
sota_results_path = "sota_results"

methods_sota = ["simod", "dsim", "rims"]
my_methods = ["prob", "det"]
casestudies = ["purchasing", "acr", "cvs", "bpi12", "bpi17"]
metrics = ["ngd", "ctd", "car"]
entropy_metrics = ["etd_entropy"]
rule_metrics = ["rule_cfd", "rule_atd", "rule_etd", "rule_wtd"]
depth_range = list(range(6))
best_depth = 5

In [3]:
def load_sota_results():
    data = {}
    for method in methods_sota:
        data[method] = {}
        for case in casestudies:
            path = os.path.join(sota_results_path, method, case, "distances.json")
            if os.path.exists(path):
                with open(path, "r") as f:
                    results = json.load(f)
                for metric in metrics + entropy_metrics:
                    val = np.mean(results.get(metric, []))
                    data[method].setdefault(case, {})[metric] = val
    return data

sota_data = load_sota_results()


In [4]:
def load_my_results():
    data = {method: {} for method in my_methods}
    for method in my_methods:
        for case in casestudies:
            path = os.path.join(my_results_path, case, "distances.json")
            if os.path.exists(path):
                with open(path, "r") as f:
                    results = json.load(f)
                for depth in depth_range:
                    key = f"maxdepth_{depth}"
                    if key in results.get(method, {}):
                        for metric in metrics + entropy_metrics:
                            vals = results[method][key].get(metric, [])
                            mean = np.mean(vals) if vals else np.nan
                            std = np.std(vals) if vals else np.nan
                            data[method].setdefault(case, {}).setdefault(metric, {})[depth] = (mean, std)
    return data

my_data = load_my_results()


In [5]:
def generate_comparison_table(depth):
    rows = []
    for case in casestudies:
        for metric in metrics:
            row = {"Case Study": case, "Metric": metric}

            # SOTA methods
            for method in methods_sota:
                val = sota_data.get(method, {}).get(case, {}).get(metric, np.nan)
                row[method] = val

            # My methods
            row[f"DBPS"] = my_data["prob"].get(case, {}).get(metric, {}).get(depth, (np.nan,))[0]

            rows.append(row)

    df = pd.DataFrame(rows)
    df.set_index(["Case Study", "Metric"], inplace=True)

    # Highlight min values
    styled = df.style.highlight_min(axis=1, color='green').highlight_max(axis=1, color='red')
    return df, styled

df_min, styled_min = generate_comparison_table(best_depth)
print("Accuracy Comparison.")
styled_min


Accuracy Comparison.


Unnamed: 0_level_0,Unnamed: 1_level_0,simod,dsim,rims,DBPS
Case Study,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
purchasing,ngd,0.577218,0.596506,0.593391,0.351867
purchasing,ctd,453.621311,584.295082,578.331066,432.121311
purchasing,car,731.434426,770.538441,773.016897,669.17377
acr,ngd,0.534745,0.249688,0.231299,0.229476
acr,ctd,716.051309,70.190285,46.282942,190.077487
acr,car,250.33089,236.814151,233.661219,180.235602
cvs,ngd,0.581966,0.341521,0.384304,0.124729
cvs,ctd,269.9298,52.4299,99.2474,59.9043
cvs,car,5.3943,20.3667,20.2621,17.777
bpi12,ngd,0.794951,0.662948,0.631976,0.350644


In [6]:
print("Avg Accuracy Comparison.")
df_min.groupby("Metric").mean().style.highlight_min(axis=1, color='green')

Avg Accuracy Comparison.


Unnamed: 0_level_0,simod,dsim,rims,DBPS
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
car,232.371577,229.907729,240.393601,200.123127
ctd,356.063472,194.920405,170.534851,153.829981
ngd,0.660645,0.486957,0.484525,0.275177


In [7]:
def generate_comparison_table(depth):
    rows = []
    for case in casestudies:
        for metric in metrics:
            row = {"Case Study": case, "Metric": metric}

            # SOTA methods
            for method in [x for x in methods_sota if x.startswith("simod")]:
                val = sota_data.get(method, {}).get(case, {}).get(metric, np.nan)
                row[method] = val

            # My methods
            row[f"DBPS"] = my_data["prob"].get(case, {}).get(metric, {}).get(depth, (np.nan,))[0]

            rows.append(row)

    df = pd.DataFrame(rows)
    df.set_index(["Case Study", "Metric"], inplace=True)

    # Highlight min values
    styled = df.style.highlight_min(axis=1, color='green')
    return df, styled

df_min, styled_min = generate_comparison_table(best_depth)
print("Accuracy Comparison (White-Box).")
styled_min


Accuracy Comparison (White-Box).


Unnamed: 0_level_0,Unnamed: 1_level_0,simod,DBPS
Case Study,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
purchasing,ngd,0.577218,0.351867
purchasing,ctd,453.621311,432.121311
purchasing,car,731.434426,669.17377
acr,ngd,0.534745,0.229476
acr,ctd,716.051309,190.077487
acr,car,250.33089,180.235602
cvs,ngd,0.581966,0.124729
cvs,ctd,269.9298,59.9043
cvs,car,5.3943,17.777
bpi12,ngd,0.794951,0.350644


In [8]:
methods = ['simod', 'maxdepth_5']

rows = []

for case in casestudies:
    filename = f'{my_results_path}/rule_distances_{case}.json'
    if os.path.exists(filename):
        with open(filename) as f:
            data = json.load(f)
            for metric in rule_metrics:
                row = {
                    'Case Study': case,
                    'Metric': metric
                }
                for method in methods:
                    row[method] = data.get(metric, {}).get(method, None)
                rows.append(row)
    else:
        print(f"Warning: File {filename} not found")

df = pd.DataFrame(rows)
df.rename(columns={"maxdepth_5": "DBPS"}, inplace=True)
df.set_index(["Case Study", "Metric"], inplace=True)

styled = df.style.highlight_min(axis=1, color='green')
print("Rule Accuracy Comparison (White-Box).")
styled

Rule Accuracy Comparison (White-Box).


Unnamed: 0_level_0,Unnamed: 1_level_0,simod,DBPS
Case Study,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
purchasing,rule_cfd,0.15926,0.05218
purchasing,rule_atd,768.306467,786.056626
purchasing,rule_etd,128.824274,24.758365
purchasing,rule_wtd,1525.204841,1706.516013
acr,rule_cfd,0.280739,0.129608
acr,rule_atd,136.765912,81.426439
acr,rule_etd,6.217943,10.543097
acr,rule_wtd,364.923837,742.671047
cvs,rule_cfd,0.237339,0.006533
cvs,rule_atd,0.365735,0.600296


In [9]:
print("Avg Rule Accuracy Comparison (White-Box).")
df.groupby("Metric").mean().style.highlight_min(axis=1, color='green')

Avg Rule Accuracy Comparison (White-Box).


Unnamed: 0_level_0,simod,DBPS
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1
rule_atd,182.609877,174.450082
rule_cfd,0.232761,0.069019
rule_etd,32.066359,9.703796
rule_wtd,882.366322,812.444997


In [10]:
def generate_entropy_comparison_table(depth):
    rows = []
    for case in casestudies:
        for metric in entropy_metrics:
            row = {"Case Study": case, "Metric": metric}

            for method in methods_sota:
                val = sota_data.get(method, {}).get(case, {}).get(metric, np.nan)
                row[method] = val

            row[f"prob_DBPS"] = my_data["prob"].get(case, {}).get(metric, {}).get(depth, (np.nan,))[0]
            row[f"det_DBPS"] = my_data["det"].get(case, {}).get(metric, {}).get(depth, (np.nan,))[0]

            rows.append(row)

    df = pd.DataFrame(rows)
    df.set_index(["Case Study", "Metric"], inplace=True)

    styled = df.style.highlight_max(axis=1, color='green').highlight_min(axis=1, color='red')
    return df, styled

df_max, styled = generate_entropy_comparison_table(best_depth)
print("Execution Time Entropy Comparison.")
styled

Execution Time Entropy Comparison.


Unnamed: 0_level_0,Unnamed: 1_level_0,simod,dsim,rims,prob_DBPS,det_DBPS
Case Study,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
purchasing,etd_entropy,1.331335,1.160079,1.196356,1.380686,0.564997
acr,etd_entropy,1.005804,0.740469,0.593001,1.149228,0.845624
cvs,etd_entropy,0.640277,0.566857,0.5752,0.366053,0.078767
bpi12,etd_entropy,1.654026,0.431299,0.557028,2.245392,0.599924
bpi17,etd_entropy,1.975552,0.603484,0.611401,2.208731,0.758401


In [11]:
df_max.groupby("Metric").mean()

Unnamed: 0_level_0,simod,dsim,rims,prob_DBPS,det_DBPS
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
etd_entropy,1.321399,0.700438,0.706598,1.470018,0.569543
