In [96]:
import os.path as op
import warnings
import numpy as np
import pandas as pd
import statsmodels.api as sm

In [92]:
proj_dir = "/Users/dschonhaut/box/projects/leads_tau_spread"
tau_rois = pd.read_excel(
    op.join(proj_dir, "data", "FTP_PETCore_Analysis_2022-12-15.xlsx"), "data"
)
tau_rois = tau_rois.sort_values(["subj", "visit"])

print("tau_rois: {}".format(tau_rois.shape))

tau_rois: (641, 159)


In [93]:
cols = {
    "demo": [
        x
        for x in tau_rois.columns
        if np.all((not x.endswith("_suvr"), not x.endswith("_vol")))
    ],
    "suvr": [x for x in tau_rois.columns if x.endswith("_suvr")],
    "vol": [x for x in tau_rois.columns if x.endswith("_vol")],
}

for k, v in cols.items():
    print("{}: {} columns".format(k, len(v)))

demo: 15 columns
suvr: 72 columns
vol: 72 columns


In [90]:
res.params

const                  0.854061
sex_m                  0.035570
age_at_baseline_pet    0.005398
dtype: float64

In [97]:
warnings.filterwarnings("ignore")

# W-score values against the control group.
tau_rois_w = tau_rois.loc[:, cols["demo"]].copy()
cols_to_wscore = cols["suvr"] + cols["vol"]

# Find baseline visits for controls.
idx = tau_rois.query("(dx=='CON') & (visit==1)").index

params = {}
for col in cols_to_wscore:
    # Regress column values against control age and sex.
    mod = sm.OLS(
        endog=tau_rois.loc[idx, col],
        exog=sm.add_constant(tau_rois.loc[idx, ["age_at_baseline_pet", "sex_m"]]),
    )
    res = mod.fit()

    # Log the model results.
    params[col] = {
        "nobs": idx.size,
        "r": np.sqrt(res.rsquared),
        "icpt": res.params["const"],
        "beta_age": res.params["age_at_baseline_pet"],
        "t_age": res.tvalues["age_at_baseline_pet"],
        "p_age": res.pvalues["age_at_baseline_pet"],
        "beta_sex": res.params["sex_m"],
        "t_sex": res.tvalues["sex_m"],
        "p_sex": res.pvalues["sex_m"],
        "mean_resid": np.mean(res.resid),
        "std_resid": np.std(res.resid),
    }

    # Predict test score from age for all subjects (controls + patients).
    resid = tau_rois[col] - res.predict(
        exog=sm.add_constant(tau_rois[["age_at_baseline_pet", "sex_m"]])
    )

    # Z-score the residuals to obtain W-scores. Mean and standard deviation
    # W-score in the control group should be 0 and 1, respectively.
    _mean = params[col]["mean_resid"]  # should be 0
    _std = params[col]["std_resid"]
    tau_rois_w.loc[:, col] = (resid - _mean) / _std
    
warnings.resetwarnings()

In [103]:
np.sort(np.mean(tau_rois_w.loc[idx, cols_to_wscore], 0).values)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1.])

In [111]:
tau_rois_w.loc[tau_rois.query("(dx=='EOAD') & (visit==1)").index, cols["suvr"]].mean(axis=0).sort_values()

l_rostralanteriorcingulate_suvr     2.356529
r_hippocampus_suvr                  2.391099
r_rostralanteriorcingulate_suvr     2.533800
l_hippocampus_suvr                  2.615459
r_temporalpole_suvr                 3.236199
                                     ...    
l_superiorparietal_suvr            13.044196
r_inferiorparietal_suvr            13.528589
r_supramarginal_suvr               13.721827
r_caudalmiddlefrontal_suvr         14.721389
l_caudalmiddlefrontal_suvr         16.507058
Length: 72, dtype: float64

In [20]:
for col in cols:
    print(col, len(col))

demo 4
suvr 4
vol 3


In [11]:
tau_rois.insert(1, "tracer", "ftp")
rename_cols = {
    "ID": "subj",
    "FTPPET_date": "scan_date",
    "ImageID": "image_id",
    "CohortAssgn": "dx",
}

Unnamed: 0,ID,FTPPET_Date,ImageID,CohortAssgn,ScalingFactor_InfCerebGray,ScalingFactor_ErodedWM,Assigned_MRIBASED_MetaROI_ADNIcutoff_1p2,MetaROI_MRIBASED_SUVR,Braak_1_MRIBASED_SUVR,Braak_12_MRIBASED_SUVR,...,ctx_rh_rostralanteriorcingulate_ClustSize,ctx_rh_rostralmiddlefrontal_ClustSize,ctx_rh_superiorfrontal_ClustSize,ctx_rh_superiorparietal_ClustSize,ctx_rh_superiortemporal_ClustSize,ctx_rh_supramarginal_ClustSize,ctx_rh_frontalpole_ClustSize,ctx_rh_temporalpole_ClustSize,ctx_rh_transversetemporal_ClustSize,ctx_rh_insula_ClustSize
0,LDS0070120,2019-06-20,I1182905,CN,0.9835,1.1526,0,1.158394,1.240590,1.234375,...,1750,22319,29378,9263,14476,7245,1448,2695,1034,9249
1,LDS0070166,2019-08-21,I1226127,EOAD,0.9838,1.5729,1,2.862929,1.932792,1.748436,...,1840,12147,17773,10002,10505,7714,1319,3021,921,6746
2,LDS0070166,2020-09-11,I1335994,EOAD,0.9509,1.4784,1,2.544361,1.839366,1.710155,...,1737,10534,17368,8952,10206,7178,1292,2822,875,6639
3,LDS0070166,2021-10-29,I1519134,EOAD,0.9819,1.5746,1,2.702192,1.943379,1.750326,...,1668,10036,16449,8910,9584,6750,1392,2830,782,6383
4,LDS0070166,2022-11-04,I1640345,EOAD,0.9338,1.4722,1,2.432061,1.819350,1.756143,...,1665,9282,16516,8405,9585,6125,1330,2752,818,6563
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,LDS9410412,2021-08-31,I1487527,EOnonAD,0.9813,1.2082,1,1.217195,1.246210,1.313947,...,2284,17205,21468,15207,11495,10522,1475,2894,1058,6867
641,LDS9410450,2022-02-09,I1548968,EOAD,0.9832,1.2816,1,1.677261,1.505026,1.425720,...,2091,12446,16683,9647,9455,7241,1474,2874,863,5730
642,LDS9410459,2022-02-01,I1542147,EOAD,0.9828,1.1536,1,1.580741,1.327861,1.314396,...,2104,19366,25145,13214,12296,9671,1432,2853,1271,7350
643,LDS9410487,2022-05-17,I1583509,EOnonAD,0.9086,1.0382,0,1.162100,1.126608,1.262715,...,1753,13765,18913,10579,11391,8217,1111,2631,880,5989
