In [None]:
import sys
sys.path.append('/causal-discovery')

from cdrl.agent.mcts.mcts_agent import *
import numpy as np
import pandas as pd

from cdrl.io.storage import EvaluationStorage
from cdrl.io.file_paths import FilePaths

experiment_ids = []

instances = ["sachs"] + [f"syntren{d}" for d in range(1, 11)]
fp_out = FilePaths('/experiment_data', 'aggregate_cdrl')

for inst in instances:
    experiment_ids.append(f"{inst}_primary")

In [None]:
metrics_to_display = {
    "construct": ["shd", "fdr", "tpr", "reward"],
    "prune_cam": ["shd", "fdr", "tpr"],
}

def get_eval_df(experiment_ids, which_results="construct", collapse_syntren=True):
    all_eval_data = []

    for exp_id in experiment_ids:
        fp_in = FilePaths('/experiment_data', exp_id)
        storage = EvaluationStorage(fp_in)

        emd = storage.get_metrics_data("eval")

        metrics = metrics_to_display[which_results]

        for entry in emd:
            for metric in metrics:
                row_dict = {}

                row_dict["metric"] = metric
                row_dict["value"] = entry["results"][which_results][metric]

                row_dict["agent"] = entry["agent"]
                if entry["agent"].startswith("uct"):
                    row_dict["agent"] = "uct"

                row_dict["instance"] = exp_id.split("_")[0]

                if collapse_syntren:
                    if row_dict["instance"].startswith("syntren"):
                        row_dict["instance"] = "syntren"

                all_eval_data.append(row_dict)

    eval_df = pd.DataFrame(all_eval_data)
    return eval_df

In [None]:
construct_df = get_eval_df(experiment_ids, which_results="construct")

In [None]:
construct_df

In [None]:
construct_pivot = construct_df.pivot_table(columns=["agent"], index=["instance", "metric"])
construct_pivot

In [None]:
prune_df = get_eval_df(experiment_ids, which_results="prune_cam")

In [None]:
prune_pivot = prune_df.pivot_table(columns=["agent"], index=["instance", "metric"])
prune_pivot

In [None]:
raw_prune_df = get_eval_df(experiment_ids, which_results="prune_cam", collapse_syntren=False)

In [None]:
raw_prune_df = raw_prune_df.drop(raw_prune_df[raw_prune_df.instance == "sachs"].index)
rankings_pivot = raw_prune_df.pivot_table(columns=["agent"], index=["instance", "metric"])

rankings_pivot.columns = rankings_pivot.columns.droplevel(0)
rankings_pivot.drop(columns=["notears"], inplace=True)
rankings_pivot = pd.DataFrame(rankings_pivot.to_records())

In [None]:
rankings_data = []
for row in rankings_pivot.itertuples():
    metric = getattr(row, 'metric')
    instance = getattr(row, "instance")

    algo_perfs = []
    all_algos = rankings_pivot.columns.tolist()[2:]

    for algo in all_algos:
        algo_perfs.append(getattr(row, algo))

    perfs_arr = np.array(algo_perfs)
    # if metric == "tpr":
    #     perfs_arr = -perfs_arr

    # print(perfs_arr)
    perfs_ranked = np.argsort(np.argsort(perfs_arr)) + np.ones(len(algo_perfs))

    # print(perfs_ranked)
    rrs = np.ones(len(perfs_arr)) / perfs_ranked
    # print(rrs)

    for i, algo in enumerate(all_algos):
        rankings_data.append({"metric": metric,
                              "agent": algo,
                              "rr": rrs[i]})

rdf = pd.DataFrame(rankings_data)
# rdf = rdf.reset_index(drop=True)
# rdf
rdfp = rdf.pivot_table(columns=["agent"], index=["metric"])
rdfp.columns = rdfp.columns.droplevel(0)
# rdfp["agg"] = ["mrr"] * len(rdfp)
rdfp["instance"] = ["syntren"] * len(rdfp)
rdfp = pd.DataFrame(rdfp.to_records())

pp_copy = deepcopy(prune_pivot)
pp_copy.columns = pp_copy.columns.droplevel(0)
mdfp = pd.DataFrame(pp_copy.to_records())
# mdfp["agg"] = ["mean"] * len(pp_copy)

# joint_prune_df = pd.concat([rdfp, mdfp])
# joint_prune_df

cdfp = deepcopy(construct_pivot)
cdfp.columns = cdfp.columns.droplevel(0)
cdfp = pd.DataFrame(cdfp.to_records())

for excluded_agent in ["cam", "lingam", "notears", "ges", "pc"]:
    cdfp[excluded_agent] = [-999.] * len(cdfp)


In [None]:
import scipy as sp

def compute_ci(data, confidence=0.95):
    if len(data) == 1:
        return 0.

    a = np.array(data)
    n = len(a)
    se = sp.stats.sem(a)
    h = se * sp.stats.t.ppf((1 + confidence) / 2., n-1)
    return h

def augment_with_cis(results_pivot, orig_df):
    pivot_cp = deepcopy(results_pivot)

    all_algos = results_pivot.columns.tolist()[2:]
    all_algos = [a for a in all_algos if a not in ["greedy", "cam", "notears", "lingam", "ges", "pc"]]

    for algo in all_algos:
        algo_cis = []

        for row in results_pivot.itertuples():
            metric = getattr(row, 'metric')
            instance = getattr(row, "instance")

            # print(algo, metric, instance)
            relevant_entries = orig_df[(orig_df["metric"] == metric) &
                                       (orig_df['instance'] == instance) &
                                       (orig_df['agent'] == algo)]
            metric_values = relevant_entries["value"].tolist()
            ci = compute_ci(metric_values)
            # print(metric_values, ci)
            algo_cis.append(ci)

        pivot_cp[f"{algo}_ci"] = algo_cis

    for algo in all_algos:
        colname_ci = f"{algo}_ci"
        pivot_cp[algo] = pivot_cp.agg(lambda x: f"{x[algo]:.3f}±{x[colname_ci]:.3f}", axis=1)
        pivot_cp.drop(columns=[colname_ci], inplace=True)

    return pivot_cp



In [None]:
cdfp_final = augment_with_cis(cdfp, construct_df)
cdfp_final

In [None]:
# mdfp
mdfp_final = augment_with_cis(mdfp, prune_df)
mdfp_final

In [None]:
import re

def prepare_and_write_latex(df, which_results="construct", file_suffix=""):
    if which_results == "joint":
        colorder = ["phase", "instance", "metric", "uct", "rlbic", "greedy", "randomshooting", "random", "cam", "lingam", "notears", "ges", "pc"]
    elif which_results == "prune_cam":
        colorder = ["instance", "metric", "uct", "rlbic", "greedy", "randomshooting", "random", "cam", "lingam", "notears", "ges", "pc"]
    elif file_suffix == "":
        colorder = ["instance", "metric", "uct", "rlbic", "greedy", "randomshooting", "random",  "cam", "lingam", "notears", "ges", "pc"]
    else:
        colorder = ["metric", "uct", "rlbic", "greedy", "randomshooting", "random", "cam", "lingam", "notears", "ges", "pc"]

    agent_display_names = {"uct": "CD-UCT",
                           "rlbic": "RL-BIC",
                           "greedy": "Greedy Search",
                           "random": "Uniform Sampling",
                           "randomshooting": "Random Search",
                           "cam": "CAM",
                           "lingam": "LiNGAM",
                           "notears": "NOTEARS",
                           "ges": "GES",
                           "pc": "PC"
                           }

    df = df[colorder]
    df['metric'] = pd.Categorical(df['metric'],categories=['reward', 'tpr','fdr','shd'], ordered=True)

    if which_results == "joint":
        df = df.sort_values(by=["phase", "instance", "metric"])
    elif file_suffix == "":
        df = df.sort_values(by=["instance", "metric"])
    else:
        df = df.sort_values(by=["metric"])

    if which_results == "prune_cam":
        df.loc[ df["instance"] == "syntren", ["notears"]] = -100
    df.rename(columns=agent_display_names, inplace=True)

    texfile =  str(fp_out.figures_dir / f"{which_results}_final{'_' + file_suffix if file_suffix != '' else ''}.tex")
    fh = open(texfile, 'w')

    n_startcols = 3 if which_results == "joint" else (2 if file_suffix == "" else 1)

    colformat = f"{'c' * n_startcols}|" + ("r" * (len(colorder) - n_startcols))
    df.to_latex(buf=fh, float_format="{:0.3f}".format, index=False, column_format=colformat)
    fh.close()

    replace_dict = {
        r"instance" : r"",
        r"agg" : r"",
        r"metric" : r"",
        r"phase": r"Phase",
        r"construct": r"\\textbf{Construct}",
        r"prune": r"\\textbf{Prune}",

        r"reward": r"Reward $\uparrow$",
        r"tpr": r"TPR $\uparrow$",
        r"fdr": r"FDR $\downarrow$",
        r"shd": r"SHD $\downarrow$",
        r"-100.000": r"$\\times$",
        r"-999.000": r"---",

        r"sachs": r"\\textit{Sachs}",
        r"syntren": r"\\textit{SynTReN}",
        r"mrr": r"MRR",

        r"nan±nan": r"$\infty$",
        r"NaN": r"$\infty$",
        r"nan": r"$\infty$",

        r"±(\d+\.\d+)": r"\\tiny{$\\pm\g<1>$}",
        r"±---": r"\\tiny{$\\pm0.000$}"
    }

    with open(texfile, 'r') as f:
        raw_content = f.read()

    processed_content = raw_content
    for orig, targ in replace_dict.items():
        processed_content = re.sub(orig, targ, processed_content, flags = re.M)

    with open(texfile, 'w') as g:
        g.write(processed_content)

    return df

In [None]:
joint_cdfp = deepcopy(cdfp_final)
joint_cdfp["phase"] = ["construct"] * len(joint_cdfp)

joint_mdfp = deepcopy(mdfp_final)
joint_mdfp["phase"] = ["prune"] * len(joint_mdfp)

joint_df = pd.concat([joint_cdfp, joint_mdfp])
prepare_and_write_latex(joint_df, which_results="joint")

In [None]:
experiment_ids = ["synth50qr_scaleup"]

scaleup_df = get_eval_df(experiment_ids, which_results="construct")

scaleup_pivot = scaleup_df.pivot_table(columns=["agent"], index=["instance", "metric"])


sdfp = deepcopy(scaleup_pivot)
sdfp.columns = sdfp.columns.droplevel(0)
sdfp = pd.DataFrame(sdfp.to_records())

sdfp_final = augment_with_cis(sdfp, scaleup_df)
sdfp_final["rlbic"] = [float("nan")] * len(sdfp_final)

prepare_and_write_latex(sdfp_final, which_results="construct", file_suffix="scaleup")

In [None]:
sdfp_final