```
This file is part of Estimation of Causal Effects in the Alzheimer's Continuum (Causal-AD).

Causal-AD is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Causal-AD is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Causal-AD. If not, see <https://www.gnu.org/licenses/>.
```

# Aggregate results on semi-synthetic data

In [None]:
from colorsys import rgb_to_hls
from pathlib import Path

import numpy as np
import pandas as pd

from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize, to_hex

%matplotlib inline
import seaborn as sns

from causalad.ukb.configs import get_var_config

In [None]:
# Parameters
subst_conf_dir: str = "outputs/ukb/subst_conf"
bias_csv: str = "outputs/ukb/experiments_summary.csv"

In [None]:
pvalues = []
for f in Path(subst_conf_dir).glob("subst_conf_*_data.h5"):
    meta = pd.read_hdf(f, key="metadata")
    meta.index = [f"eval_{f.stem}"]
    pvalues.append(meta)

del f, meta

pvalues_long = pd.concat(pvalues).rename_axis(
    index="name")
pvalues = pvalues_long.reset_index().assign(
    method=lambda x: x["name"].str.split("_", expand=True).iloc[:, 3].str.slice(stop=-2)
).pivot(
    index="latent_dim", columns="method", values="pvalue"
)

pvalues.round(3)

In [None]:
def get_order():
    configs = get_var_config()
    configs = sorted(configs, key=lambda x: -x.ratio_x_z[0] / x.ratio_x_z[1])
    order = [
        f"x{c.ratio_x_z[0]}_z{c.ratio_x_z[1]}" for c in configs
    ]
    labels = {
        o: "{}/{}".format(c.ratio_x_z[1], c.ratio_x_z[0]) for o, c in zip(order, configs)
    }
    return order, labels

In [None]:
bias = pd.read_csv(bias_csv, index_col=0)

# remove entries where PPC failed
bias.loc[pvalues_long.index[pvalues_long.pvalue <= 0.1], :] = np.nan

method_index = bias.index.to_series().str.extract(
    r"eval_subst_conf_([a-z]+)-(\d)_data"
).rename(
    columns={0: "Method", 1: "Dim"}
)
method_index.loc[:, "Method"] = method_index.loc[:, "Method"].str.upper()
method_index.loc[:, "Dim"] = method_index.loc[:, "Dim"].astype(object)
method_index.loc["eval_noconf_ukb_data_t", "Method"] = "Non-Causal"
method_index.loc["eval_obsconf_ukb_data_t", "Method"] = "Obs. Confounders"
method_index.loc["eval_oracle_ukb_data_t", "Method"] = "Oracle"

bias.index = pd.MultiIndex.from_frame(method_index)

col_order, col_labels = get_order()

bias = bias.loc[:, col_order].rename(columns=col_labels)
bias.rename_axis("𝜈ₛ/𝜈ₓ", axis=1, inplace=True)
bias = bias.iloc[:, ::-1]

In [None]:
def color_column(x: pd.Series):
    cmap = sns.color_palette("flare", as_cmap=True)
    x_no_oracle = x.copy()
    x_no_oracle.loc["Oracle"] = np.nan
    vmin, vmax = np.nanpercentile(x_no_oracle.values, [1, 99])
    mapper = ScalarMappable(cmap=cmap, norm=Normalize(
        vmin=vmin, vmax=vmax,
    ))
    rgba = mapper.to_rgba(x.values)
    bg_colors = [to_hex(c) for c in rgba]

    hls = np.array([rgb_to_hls(*c) for c in rgba[:, :3]])
    fg_colors = np.where(hls[:, 1] > 0.5, "#000000", "#cecece")

    style = [f"background: {bg}; color: {fg};" for bg, fg in zip(bg_colors, fg_colors)]
    min_idx = int(np.flatnonzero(np.nanmin(x_no_oracle.values) == x_no_oracle))
    style[min_idx] += " font-style: italic; font-weight: bold;"
    return style

In [None]:
bias.style.apply(color_column, axis=0).format("{:.3f}")