```
This file is part of Estimation of Causal Effects in the Alzheimer's Continuum (Causal-AD).

Causal-AD is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Causal-AD is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Causal-AD. If not, see <https://www.gnu.org/licenses/>.
```

# Estimation of Average Causal Effect (ACE)

1. For Volumes only:
$$
y_i = \frac{\mathrm{Vol}_i}{\mathrm{TIV}_i}
$$
2. Box-Cox transform:
$$y_{i}^{(\lambda )}={\begin{cases}{\dfrac {(y_{i})^{\lambda }-1}{\lambda }}&{\text{if }}\lambda \neq 0,\\\ln (y_{i})&{\text{if }}\lambda =0,\end{cases}}$$
3. Standardize:
$$
\tilde{y}_i = \frac{ y_{i}^{(\lambda )} - \mathrm{mean}(y_{i}^{(\lambda)})}{\mathrm{sd}(y_{i}^{(\lambda )})}
$$
4. Regress on confounders:
* Age-only:
$$
\tilde{y}_i = \beta_1\text{Age}_i + \beta_2\text{Age}_i^2 + \varepsilon_i
$$

* Deconfounder:
$$
\tilde{y}_i = \mathbf{W}\mathbf{z}_i + \beta_1\text{Age}_i + \beta_2\text{Age}_i^2 + \beta_3\text{Education}_i + \beta_4\text{Gender}_i
+ \beta_5\log\left( \text{pTAU} + 1 \right) + \varepsilon_i
$$

5. Beta-Regression:
$$
    \frac{\text{ADAS}}{85} = \mathrm{logit}(\beta_0 + (\tilde{y}_i - \hat{y}_i^\text{reg})\beta + \varepsilon_i)
$$

In [None]:
from itertools import product
from pathlib import Path
import pickle
from typing import List

from joblib import Parallel, delayed
from matplotlib.cm import get_cmap
import pandas as pd

from causalad.adni.ace import (
    AceEstimator,
    BetaRegPredictor,
    FeatureTransformer,
    Plotter,
    composite_ace,
    plot_composite_ace,
    barplot_composite_ace,
)

In [None]:
# Parameters
data_path: str = "outputs/adni/adni_data_t.h5"
subst_conf_dir: str = "outputs/adni/subst_conf"
models_dir: str = "outputs/adni/models"
n_jobs: int = 5

feats: List[str] = [
    "entorhinal_thickness",
    "Hippocampus",
    # Parietal lobe
    'postcentral_thickness+inferiorparietal_thickness+supramarginal_thickness+precuneus_thickness+superiorparietal_thickness',
    "isthmuscingulate_thickness",
    # Temporal lobe
    'bankssts_thickness+superiortemporal_thickness+fusiform_thickness+inferiortemporal_thickness+middletemporal_thickness',
    "Amygdala",
    "temporalpole_thickness",
    "CC",
    "parahippocampal_thickness",
    "Accumbens-area",
    # Frontal lobe I
    'paracentral_thickness+precentral_thickness',
]

In [None]:
models_dir = Path(models_dir)
subst_conf_dir = Path(subst_conf_dir)

coef_paths = {
    "ppca": models_dir / "samples_coef_adni_ppca_subst_conf_dim6.csv",
    "bpmf": models_dir / "samples_coef_adni_bpmf_subst_conf_dim6.csv",
    "original": models_dir / "samples_coef_adni_original.csv",
    "regressout": models_dir / "samples_coef_adni_age_residualized.csv",
    "combat": models_dir / "samples_coef_adni_combat_residualized.csv",
}

## PPCA

In [None]:
ft_ppca = FeatureTransformer(
    data_path,
    str(subst_conf_dir / "adni_ppca_subst_conf_dim6.h5")
)

In [None]:
data_xy = pd.read_csv(
    models_dir / "test-idx_coef_adni_ppca_subst_conf_dim6.csv",
    squeeze=True,
)
# y = data_xy.loc[:, "ADAS13"]
# data = data_xy.drop("ADAS13", axis=1)

train_idx = ft_ppca.features.index.difference(data_xy)
len(train_idx), ft_ppca.features.shape[0]

In [None]:
est = AceEstimator(
    ft_ppca,
    coef_paths["ppca"],
)
causal_effects_ppca = Parallel(n_jobs=n_jobs, verbose=2)(
    delayed(est.compute_ace)(fname, train_idx) for fname in feats
)
causal_effects_ppca = dict(causal_effects_ppca)

In [None]:
fig = Plotter(ft_ppca.features).plot_ace(causal_effects_ppca)

## BPMF

In [None]:
ft_bpmf = FeatureTransformer(
    data_path,
    str(subst_conf_dir / "adni_bpmf_subst_conf_dim6.h5")
)

In [None]:
est = AceEstimator(
    ft_bpmf,
    coef_paths["bpmf"],
)
causal_effects_bpmf = Parallel(n_jobs=n_jobs, verbose=2)(
    delayed(est.compute_ace)(fname, train_idx) for fname in feats
)
causal_effects_bpmf = dict(causal_effects_bpmf)

In [None]:
fig = Plotter(ft_bpmf.features).plot_ace(causal_effects_bpmf)

## Age-residualized

In [None]:
ft_age_regout = FeatureTransformer(
    data_path,
    str(models_dir / "adni_age_residualized.csv")
)

In [None]:
est = AceEstimator(
    ft_age_regout,
    coef_paths["regressout"],
)
causal_effects_age_regout = Parallel(n_jobs=n_jobs, verbose=2)(
    delayed(est.compute_ace)(fname, train_idx) for fname in feats
)
causal_effects_age_regout = dict(causal_effects_age_regout)

In [None]:
fig = Plotter(ft_age_regout.features).plot_ace(causal_effects_age_regout)

## Original

In [None]:
ft_original = FeatureTransformer(
    data_path,
    residuals_path=None,
)

In [None]:
est = AceEstimator(
    ft_age_regout,
    coef_paths["original"],
)
causal_effects_original = Parallel(n_jobs=n_jobs, verbose=2)(
    delayed(est.compute_ace)(fname, train_idx) for fname in feats
)
causal_effects_original = dict(causal_effects_original)

In [None]:
fig = Plotter(ft_original.features).plot_ace(causal_effects_original)

## ComBat

In [None]:
ft_combat = FeatureTransformer(
    data_path,
    str(models_dir / "adni_combat_residualized.csv")
)

In [None]:
est = AceEstimator(
    ft_combat,
    coef_paths["combat"],
)
causal_effects_combat = Parallel(n_jobs=5, verbose=2)(
    delayed(est.compute_ace)(fname, train_idx) for fname in feats
)
causal_effects_combat = dict(causal_effects_combat)

In [None]:
fig = Plotter(ft_combat.features).plot_ace(causal_effects_combat)

## Comparison

Save results

In [None]:
causal_effects_list = [
    causal_effects_ppca, causal_effects_bpmf,
    causal_effects_age_regout, causal_effects_original,
    causal_effects_combat
]
names = ["Proposed (PPCA)", "Proposed (BPMF)", "Regress-Out", "Non-Causal", "ComBat"]

with open(models_dir / "causal_effects.pkl", "wb") as fout:
    pickle.dump((causal_effects_list, names), fout,
                protocol=pickle.HIGHEST_PROTOCOL)

del fout

In [None]:
cm = get_cmap("Set1")
palette = [
    cm.colors[0],  # red
    cm.colors[3],  # purple
    cm.colors[4],  # orange
    cm.colors[1],  # blue
    cm.colors[2],  # green
]

method_order = ["Non-Causal", "Regress-Out", "ComBat", "Proposed (PPCA)", "Proposed (BPMF)"]

styles = {m: {"color": palette[i]} for i, m in enumerate(method_order)}
styles["Non-Causal"].update({"linestyle": "dotted"})
styles["Regress-Out"].update({"linestyle": "dashed"})

In [None]:
p = Plotter(ft_ppca.features, col_order=feats, wrap_cols=4, max_adas=50, legend_out=True)
fig = p.compare_ace(causal_effects_list, names, styles)

## Composite ACE of All Selected ROIs

Compute $$\mathbb{E}[\mathrm{ADAS}\,|\, do(x_{q_{25}})] - \mathbb{E}[\mathrm{ADAS}\,|\, do(x_\text{median})]$$

In [None]:
effect = composite_ace(
    features=ft_ppca.features.loc[:, feats],
    transform_func=ft_ppca.transform_data,
    predictors={name: BetaRegPredictor(path) for name, path in coef_paths.items()},
)

effect_diff = effect.xs("mean", level=1, axis=1).iloc[:, ::-1].diff(axis=1).iloc[:, 1:]

In [None]:
effect.round(1)

In [None]:
effect_diff.round(1)

In [None]:
for (na, a), (nb, b) in product(effect_diff.iloc[:2, 0].iteritems(), effect_diff.iloc[2:, 0].iteritems()):
    print(na, "-", nb, "=>", round(a - b, 1))

In [None]:
order = ["original", "regressout", "combat", "ppca", "bpmf"]
names = ["Non-Causal", "Regress-Out", "ComBat", "Proposed\n(PPCA)", "Proposed\n(BPMF)"]
df = effect.loc[order].rename(index=dict(zip(order, names)))

ax = plot_composite_ace(
    df.xs("mean", level=1, axis=1),
    palette,
)
ax = barplot_composite_ace(df, figsize=(6, 4.25))

del df, order, names