In [4]:
import sys
import os.path as op
from glob import glob
import itertools

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
from scipy.interpolate import interp1d
import seaborn as sns
import statsmodels.api as sm
from statsmodels.stats.multitest import multipletests
from patsy import dmatrix

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error as mse

sys.path.append(op.join(op.expanduser("~"), "code", "style"))
from colors import get_colors

co, palettes = get_colors()

from general.array import array_operations as aop
from general.basic.config import get_plot_defaults, set_rcparams

mpl.rcParams = set_rcparams(mpl.rcParams)
mpl.rcParams["axes.grid"] = False
d = get_plot_defaults()
co = d.get("colors", None)
colws = d.get("colws", None)
font = d.get("font", None)
lws = d.get("lws", None)
pad = d.get("pad", None)
palettes = d.get("palettes", None)
from general.basic.helper_funcs import *
from general.basic.str_methods import *
import general.nifti.nifti_ops as nops

In [5]:
# Get paths to relevant directories.
proc_date = "2023-12-20"
proj_dir = op.join(op.expanduser("~"), "box/projects/leads_tau_spread")
ssheet_dir = op.join(proj_dir, "data", "ssheets")
loni_dir = op.join(ssheet_dir, "loni", f"download_{proc_date}")
fig_dir = op.join(proj_dir, "figs", "hai_2024")

In [20]:
# Import ROI extraction and CDR data.
tau_eoad = pd.read_csv(op.join(ssheet_dir, "tau-rois-agg_eoad-long_2024-02-06.csv"))
cdr_eoad = pd.read_csv(
    op.join(ssheet_dir, "cdr-sb_eoad-long_formatted_193subjs_2023-12-30.csv")
)

# Find subjects with 1+ CDR scores and 2+ FTP scans
eoad_long_subjs = np.intersect1d(
    tau_eoad.query("(ftp_visits>1)")["subj"], cdr_eoad["subj"]
)
tau_eoad = tau_eoad.query("(subj in @eoad_long_subjs)").reset_index(drop=True)
cdr_eoad = cdr_eoad.query("(subj in @eoad_long_subjs) & (cdr_visits>1)").reset_index(
    drop=True
)

# Add suvr_last column to tau_eoad
tau_eoad["suvr_last"] = (
    tau_eoad.sort_values(["subj", "roi", "visit"])
    .groupby(["subj", "roi"])["suvr"]
    .shift(1)
)
tau_eoad["vol_last"] = (
    tau_eoad.sort_values(["subj", "roi", "visit"])
    .groupby(["subj", "roi"])["vol"]
    .shift(1)
)

print(f"tau_eoad: {tau_eoad.shape}")
print(f"cdr_eoad: {cdr_eoad.shape}")
print(f"{len(eoad_long_subjs)} EOAD subjects with 2+ FTP scans and 1+ CDR scores")

tau_eoad: (54404, 41)
cdr_eoad: (490, 14)
187 EOAD subjects with 2+ FTP scans and 1+ CDR scores


In [21]:
# Format everything in one dataframe before modeling.
cols = [
    "subj",
    "visit",
    "key",
    "val_last",
    "val_annchg",
]
data = []
_data = cdr_eoad.loc[
    :, ["subj", "visit", "cdr_sb_last", "cdr_sb_annchg_from_last"]
].copy()
_data.insert(2, "key", "cdr_sb")
_data.columns = cols
data.append(_data)

_data = (
    tau_eoad.query("(parc==['metarois', 'earlyrois'])")
    .reset_index(drop=True)
    .loc[:, ["subj", "visit", "roi", "suvr_last", "suvr_annchg_from_last"]]
    .copy()
)
_data.columns = cols
data.append(_data)

data = (
    pd.concat(data, axis=0).sort_values(["key", "subj", "visit"]).reset_index(drop=True)
)
print(f"data: {data.shape}")
print(f"{data['subj'].nunique()} subjects, {data['key'].nunique()} keys")

data: (4242, 5)
187 subjects, 9 keys


In [22]:
data.head()

Unnamed: 0,subj,visit,key,val_last,val_annchg
0,LDS0070166,1,bltemp,,
1,LDS0070166,2,bltemp,3.05276,-0.345061
2,LDS0070166,3,bltemp,2.687151,0.150959
3,LDS0070166,4,bltemp,2.857845,-0.310755
4,LDS0070174,1,bltemp,,


In [None]:
class Trails(object):
    def __init__(self, data):
        self.data = data
        self.trails = self.fit_trails()

    def get_trails(self):
        trails = self.tau_all_agg.query("roi == 'Trails'")
        trails = trails.query("age >= 50")
        trails = trails.query("age <= 90")
        trails = trails.query