This file is part of Estimation of Causal Effects in the Alzheimer's Continuum (Causal-AD).

Causal-AD is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

Causal-AD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with Causal-AD. If not, see <https://www.gnu.org/licenses/>.

# Step 1: Estimate Substitute Confounders

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

from causalad.adni_experiments import (
    load_adni_data,
    apply_volume_transforms,
    get_bpmf_deconfounder,
    get_ppca_deconfounder,
    get_regressed_out_volumes,
)

In [2]:
data_vols, data_extra, outcome = load_adni_data(remove_outliers=True)
print(data_vols.shape, data_extra.shape)

INFO:causalad.adni_experiments:
12-16               923
more_than_16        905
less_or_equal_12    325
Name: EDU-ATTAIN, dtype: int64

INFO:causalad.utils:89 outliers removed
INFO:causalad.adni_experiments:Dropping 11 with missing or zero ADAS13

INFO:causalad.adni_experiments:
A+/T+/N+    398
A+/T+/N-    268
A+/T-/N-     99
Name: ATN_status, dtype: int64

INFO:causalad.adni_experiments:
APGEN2  2     3    4
APGEN1              
2       6   172   49
3       5  1171  837
4       2    18  227



(711, 22) (711, 12)


## Apply Data Transformations

In [3]:
data_vols_t = apply_volume_transforms(data_vols)

In [4]:
pd.concat((outcome, data_extra, data_vols_t), axis=1).to_csv(
    "data/adni-transformed.csv"
)

## Fit Regress-out Model

This may take a while.

In [5]:
seed = 1501
latent_dim = 6
base_dir = Path(f"data/outputs/adni/dim{latent_dim}")

if not base_dir.exists():
    base_dir.mkdir(parents=True)

In [6]:
resid_regress_out = get_regressed_out_volumes(data_vols_t, data_extra, random_state=seed)

In [7]:
output_regress_out = pd.concat((outcome, data_extra, resid_regress_out), axis=1)
file_regress_out = base_dir / "adni_aug_regressout.csv"
output_regress_out.to_csv(file_regress_out)

print(file_regress_out.resolve())

/notebooks/data/outputs/adni/dim6/adni_aug_regressout.csv


## Fit PPCA Model

This may take a while.

In [8]:
deconf_ppca, resid_ppca, pval_ppca = get_ppca_deconfounder(
    data_vols_t, data_extra, latent_dim=latent_dim, random_state=seed
)

INFO:causalad.adni_experiments:Overall p-value: 0.766112


In [9]:
output_ppca = pd.concat((outcome, data_extra, deconf_ppca, resid_ppca), axis=1)
file_ppca = base_dir / "adni_aug_PPCA.csv"
output_ppca.to_csv(file_ppca)

print(file_ppca.resolve())

/notebooks/data/outputs/adni/dim6/adni_aug_PPCA.csv


## Fit BPMF Model

This may take a while.

In [10]:
deconf_bpmf, resid_bpmf, pval_bpmf = get_bpmf_deconfounder(
    data_vols_t, data_extra, latent_dim=latent_dim, random_state=seed
)

  out = random_state.multivariate_normal(mean, cov, size)
INFO:causalad.adni_experiments:Overall p-value: 0.289536


In [11]:
output_bpmf = pd.concat((outcome, data_extra, deconf_bpmf, resid_bpmf), axis=1)
file_bpmf = base_dir / "adni_aug_BPMF.csv"
output_bpmf.to_csv(file_bpmf)

print(file_bpmf.resolve())

pd.Series([pval_ppca, pval_bpmf], index=["PPCA", "BPMF"], name="pvalue").to_csv(
    base_dir / "pvalue.csv",
    header=True,
    index=True)

/notebooks/data/outputs/adni/dim6/adni_aug_BPMF.csv


Continue with [Step 2: Estimate Effects](adni-estimate-effects.ipynb).