In [1]:
import numpy as np
np.random.seed(42)
import pandas as pd
import os
from tqdm import tqdm
from scipy.stats import spearmanr, pearsonr
from pingouin import partial_corr
np.random.seed(42)
from itertools import combinations, product
from loading_functions import get_hypothesis_data

In [2]:
df = pd.read_csv('eeg_mri_cognition_deidentified.csv', index_col=0)
df.columns[:30]

Index(['interval_mri-eeg_abs1', 'report_date_time', 'report_description',
       'report_status', 'report_type', 'age', 'sex', 'bmi', 'ahi', 'medbenzo',
       'medantidep', 'medsedative', 'medantieplipetic', 'medstimulant', 'mmse',
       'mean_gradient_f3-m2_w', 'mean_gradient_f4-m1_w',
       'mean_gradient_c3-m2_w', 'mean_gradient_o1-m2_w',
       'slow_bandpower_mean_f_w', 'slow_bandpower_mean_c_w',
       'slow_bandpower_mean_o_w', 'delta_bandpower_mean_f_w',
       'delta_bandpower_mean_c_w', 'delta_bandpower_mean_o_w',
       'slowdelta_bandpower_mean_c_w', 'theta_bandpower_mean_f_w',
       'theta_bandpower_mean_o_w', 'alpha_bandpower_mean_f_w',
       'sigma_bandpower_mean_f_w'],
      dtype='object')

In [3]:
# clip at 99% percentile:
df['slowdelta_bandpower_total'] = df['slowdelta_bandpower_total'].clip(lower=df['slowdelta_bandpower_total'].quantile(0),
                                                                        upper=df['slowdelta_bandpower_total'].quantile(0.99))


df_all = df.copy()
df_cognition = df[pd.notna(df.mmse)].copy()
print(df_all.shape)
print(df_cognition.shape)

(623, 853)
(160, 853)


In [4]:
group_names = {
    'dt_groupall': 'dt all',
    'dt_group1': 'dt within 1 year',
    'dt_group2': 'dt between 1-2.5 years',
    'dt_group3': 'dt between 2.5-7 years',

    'age_groupall': 'age all',
    'age_group1': 'age younger than 60',
    'age_group2': 'age between 60-70',
    'age_group3': 'age older than 70',
}

In [5]:
# group 1 < 1 year

for df_sel in df_all, df_cognition:
    print('___')
    df_sel['dt_groupall'] = 1
    df_sel['dt_group1'] = df_sel['dt_abs'].apply(lambda x: 1 if x < 1 else 0)
    df_sel['dt_group2'] = df_sel['dt_abs'].apply(lambda x: 1 if x >= 1 and x < 2.5 else 0)
    df_sel['dt_group3'] = df_sel['dt_abs'].apply(lambda x: 1 if x >= 2.5 else 0)
    print(df_sel[['dt_group1', 'dt_group2', 'dt_group3']].sum())

    # age groups:
    df_sel['age_groupall'] = 1
    df_sel['age_group1'] = df_sel['age'].apply(lambda x: 1 if x < 60 else 0)
    df_sel['age_group2'] = df_sel['age'].apply(lambda x: 1 if x >= 60 and x < 70 else 0)
    df_sel['age_group3'] = df_sel['age'].apply(lambda x: 1 if x >= 70 else 0)
    print(df_sel[['age_group1', 'age_group2', 'age_group3']].sum())



___
dt_group1    244
dt_group2    155
dt_group3    224
dtype: int64
age_group1    285
age_group2    187
age_group3    151
dtype: int64
___
dt_group1    75
dt_group2    44
dt_group3    41
dtype: int64
age_group1    48
age_group2    51
age_group3    61
dtype: int64


In [6]:
from decimal import Decimal

def hypothesis_prepare_sleep_mri(df_hypothesis_sleep, df_hypothesis_mri, df_covariates):
    df_hypothesis = pd.concat([df_hypothesis_sleep, df_hypothesis_mri, df_covariates], axis=1)
    print(f"N = {len(df_hypothesis)}")
    pairs = list(product(df_hypothesis_sleep.columns, df_hypothesis_mri.columns))
    
    return pairs

def hypothesis_test_routine_pairs(df, pairs, results=None):
    
    for statistical_test in ['pearsonr', 'partial_pearsonr']:
        print(f'\n{statistical_test.upper()}')
        for pair in pairs:
            # df_pair = df_hypothesis[[pair[0], pair[1], 'age', 'sex']].dropna(how='any', axis=0)
            df_pair = df[[pair[0], pair[1], 'age', 'sex']].dropna(how='any', axis=0)
            x = df_pair.iloc[:, 0]
            y = df_pair.iloc[:, 1]
            # z-standardize x and y for stability:
            x = (x - x.mean()) / x.std()
            y = (y - y.mean()) / y.std()
            age_sel = df_pair.iloc[:, 2]
            sex_sel = df_pair.iloc[:, 3]
            t, p = do_correlation(x, y, statistical_test, age_sel, sex_sel)
            print(f"{pair}, N={len(df_pair)}: {np.round(t, 3)}, {Decimal(p):.2E}")
            if results is not None:
                results.loc[str(pair), f'r {statistical_test}'] = t
                results.loc[str(pair), f'p {statistical_test}'] = p
    return results

def do_correlation(x, y, test, age_sel, sex_sel):

    if test == 'pearsonr':
        test_statistic, p_val = pearsonr(x, y)
    elif test == 'partial_pearsonr':
        df_partial = pd.DataFrame(np.array([x, y, age_sel, sex_sel]).T, columns=['x', 'y', 'age', 'sex'])
        stats = partial_corr(df_partial, x='x', y='y', covar=['age', 'sex'],  method='pearson') # , 'sex'
        test_statistic = stats['r'].item()
        p_val = stats['p-val'].item()
        
    return test_statistic, p_val


def hypothesis_prepare_cognition(df_hypothesis_sleep, df_hypothesis_mri):
    # print(f"N = {len(df_hypothesis_sleep)}")
    pairs = list(product(df_hypothesis_sleep.columns, ['mmse'])) + list(product(df_hypothesis_mri.columns, ['mmse']))
    
    return pairs

In [7]:
def run_analysis_for_selected_cohort(df_sel):
    """
    df_sel: selected cohort
    returns results dataframe
    """

    ### covariates
    age = df_sel.age.values
    sex = df_sel.sex.astype(int) # convert male/female to 1/0
    df_covariates = pd.DataFrame(np.array([age, sex]).T, columns=['age', 'sex'])
    df_covariates.index = df_sel.index
    assert all(df_sel.index == df_covariates.index)

    # print(f"N = {len(df_sel)}")

    results = pd.DataFrame()

    hypothesis_name = 'slow waves'
    df_hypothesis_sleep, df_hypothesis_mri = get_hypothesis_data(df_sel, df_sel, hypothesis_name)
    pairs = hypothesis_prepare_sleep_mri(df_hypothesis_sleep, df_hypothesis_mri, df_covariates)
    results = hypothesis_test_routine_pairs(df_sel, pairs, results)

    hypothesis_name = 'spindles'
    df_hypothesis_sleep, df_hypothesis_mri = get_hypothesis_data(df_sel, df_sel, hypothesis_name)
    pairs = hypothesis_prepare_sleep_mri(df_hypothesis_sleep, df_hypothesis_mri, df_covariates)
    results = hypothesis_test_routine_pairs(df_sel, pairs, results)

    hypothesis_name = 'rem'
    df_hypothesis_sleep, df_hypothesis_mri = get_hypothesis_data(df_sel, df_sel, hypothesis_name)
    pairs = hypothesis_prepare_sleep_mri(df_hypothesis_sleep, df_hypothesis_mri, df_covariates)
    results = hypothesis_test_routine_pairs(df_sel, pairs, results)

    hypothesis_name = 'wake'
    df_hypothesis_sleep, df_hypothesis_mri = get_hypothesis_data(df_sel, df_sel, hypothesis_name)
    pairs = hypothesis_prepare_sleep_mri(df_hypothesis_sleep, df_hypothesis_mri, df_covariates)
    results = hypothesis_test_routine_pairs(df_sel, pairs, results)

    for hypothesis_name in ['slow waves', 'spindles', 'rem', 'wake']:
        df_hypothesis_sleep, df_hypothesis_mri = get_hypothesis_data(df_sel, df_sel, hypothesis_name)
        df_hypothesis_sleep = df_hypothesis_sleep.loc[df_sel.index]
        df_hypothesis_mri = df_hypothesis_mri.loc[df_sel.index]
        df_covariates = df_covariates.loc[df_sel.index]
        
        pairs = hypothesis_prepare_cognition(df_hypothesis_sleep, df_hypothesis_mri)
        results = hypothesis_test_routine_pairs(df_sel, pairs, results)

    # add N, age statistic and sex statistic in results
    results['N'] = [len(df_sel)] * len(results)
    results['age statistic'] = [f"{np.round(df_sel['age'].mean(), 1)} ({np.round(df_sel['age'].std(), 1)})"] * len(results)
    results['% female'] = [f"{(np.round(1- df_sel['sex'].mean(), 2))}"] * len(results)

    # for all the cognition pairs, add N and age statistic
    results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
    results.loc[results.index.str.contains('mmse'), 'age statistic'] = f"{np.round(df_sel['age'].loc[df_sel.mmse.notna()].mean(), 1)} ({np.round(df_sel['age'].loc[df_sel.mmse.notna()].std(), 1)})"
    results.loc[results.index.str.contains('mmse'), '% female'] = f"{(np.round(1- df_sel.loc[df_sel.mmse.notna(), 'sex'].mean(), 2))}"

    print(results.head(2))

    return results

In [8]:
results_collection = {}

# restrict df_sel to the intersection of dt and age groups:

for dt_sel in ['dt_groupall', 'dt_group1', 'dt_group2', 'dt_group3']:
    for age_sel in ['age_groupall', 'age_group1', 'age_group2', 'age_group3']:
        df_sel = df_all[(df_all[dt_sel] == 1) & (df_all[age_sel] == 1)]

        namestr = f"{group_names[dt_sel]}, {group_names[age_sel]}"
        print(f"\n{dt_sel}, {age_sel}")

        results = run_analysis_for_selected_cohort(df_sel)
        results.to_csv(f'./results/results_hypotheses_{namestr}.csv', index_label='pair')
        results_collection[namestr] = results


# make reuslts_collection a dataframe with multiindex.
df_multiindex = pd.concat(results_collection, axis=0, names=['group', 'feature_pair'])
feature_pairs_all = list(df_multiindex.xs(key='dt all, age all', level='group').index)


dt_groupall, age_groupall
N = 623

PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=623: 0.1, 1.26E-2
('slowdelta_bandpower_total', 'vol-thalamus'), N=623: 0.271, 5.54E-12
('so_rate_f', 'vol-ctx--anterior'), N=623: 0.083, 3.72E-2
('so_rate_f', 'vol-thalamus'), N=623: 0.2, 4.97E-7

PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=623: -0.043, 2.79E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=623: -0.013, 7.53E-1
('so_rate_f', 'vol-ctx--anterior'), N=623: -0.014, 7.23E-1
('so_rate_f', 'vol-thalamus'), N=623: -0.002, 9.54E-1
N = 623

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=623: 0.194, 1.01E-6
('ss_dens_f', 'vol-hippocampus'), N=623: 0.079, 4.91E-2
('fs_dens_c', 'vol-thalamus'), N=623: 0.312, 1.71E-15
('fs_dens_c', 'vol-hippocampus'), N=623: 0.174, 1.19E-5

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=623: 0.006, 8.77E-1
('ss_dens_f', 'vol-hippocampus'), N=623: -0.017, 6.69E-1
('fs_dens_c', 'vol-thalamus'), N=623: 0.11, 6.06E-3
('fs_dens_c

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('so_rate_f', 'vol-ctx--anterior'), N=187: 0.08, 2.75E-1
('so_rate_f', 'vol-thalamus'), N=187: 0.131, 7.34E-2

PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=187: 0.028, 7.06E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=187: 0.1, 1.77E-1
('so_rate_f', 'vol-ctx--anterior'), N=187: 0.096, 1.91E-1
('so_rate_f', 'vol-thalamus'), N=187: 0.068, 3.60E-1
N = 187

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=187: 0.098, 1.84E-1
('ss_dens_f', 'vol-hippocampus'), N=187: 0.004, 9.57E-1
('fs_dens_c', 'vol-thalamus'), N=187: 0.285, 7.76E-5
('fs_dens_c', 'vol-hippocampus'), N=187: 0.114, 1.21E-1

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=187: 0.056, 4.51E-1
('ss_dens_f', 'vol-hippocampus'), N=187: -0.027, 7.10E-1
('fs_dens_c', 'vol-thalamus'), N=187: 0.277, 1.32E-4
('fs_dens_c', 'vol-hippocampus'), N=187: 0.103, 1.61E-1
N = 187

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=187: 0.006, 9.32E-1
('perc_r', 'vol-amygdala'), N=187: 0.051, 4.88E-1
('perc_r', 'vol-

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())



PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=244: -0.093, 1.49E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=244: -0.013, 8.39E-1
('so_rate_f', 'vol-ctx--anterior'), N=244: -0.034, 6.03E-1
('so_rate_f', 'vol-thalamus'), N=244: -0.017, 7.87E-1
N = 244

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=244: 0.203, 1.40E-3
('ss_dens_f', 'vol-hippocampus'), N=244: 0.114, 7.52E-2
('fs_dens_c', 'vol-thalamus'), N=244: 0.321, 2.96E-7
('fs_dens_c', 'vol-hippocampus'), N=244: 0.207, 1.15E-3

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=244: 0.016, 8.00E-1
('ss_dens_f', 'vol-hippocampus'), N=244: 0.012, 8.52E-1
('fs_dens_c', 'vol-thalamus'), N=244: 0.064, 3.21E-1
('fs_dens_c', 'vol-hippocampus'), N=244: 0.063, 3.27E-1
N = 244

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=244: 0.035, 5.81E-1
('perc_r', 'vol-amygdala'), N=244: 0.059, 3.57E-1
('perc_r', 'vol-brain-stem'), N=244: 0.041, 5.28E-1

PARTIAL_PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=244: 0.0

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('perc_r', 'mmse'), N=21: 0.206, 3.98E-1
('vol-ctx--isthmuscingulate', 'mmse'), N=21: 0.121, 6.23E-1
('vol-amygdala', 'mmse'), N=21: 0.024, 9.21E-1
('vol-brain-stem', 'mmse'), N=21: -0.271, 2.62E-1

PEARSONR
('alpha_bandpower_mean_o_w', 'mmse'), N=21: 0.07, 7.63E-1
('vol-thalamus', 'mmse'), N=21: 0.012, 9.58E-1
('vol-total_ventricle', 'mmse'), N=21: -0.083, 7.22E-1

PARTIAL_PEARSONR
('alpha_bandpower_mean_o_w', 'mmse'), N=21: 0.132, 5.90E-1
('vol-thalamus', 'mmse'), N=21: 0.135, 5.81E-1
('vol-total_ventricle', 'mmse'), N=21: -0.175, 4.73E-1
                                                    r pearsonr  p pearsonr  \
('slowdelta_bandpower_total', 'vol-ctx--anterior')   -0.157810    0.096552   
('slowdelta_bandpower_total', 'vol-thalamus')         0.074264    0.436453   

                                                    r partial_pearsonr  \
('slowdelta_bandpower_total', 'vol-ctx--anterior')           -0.175566   
('slowdelta_bandpower_total', 'vol-thalamus')                -0.027250

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=155: 0.125, 1.23E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=155: 0.237, 3.04E-3
('so_rate_f', 'vol-ctx--anterior'), N=155: 0.109, 1.75E-1
('so_rate_f', 'vol-thalamus'), N=155: 0.143, 7.56E-2

PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=155: -0.012, 8.84E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=155: -0.064, 4.33E-1
('so_rate_f', 'vol-ctx--anterior'), N=155: 0.021, 7.98E-1
('so_rate_f', 'vol-thalamus'), N=155: -0.037, 6.49E-1
N = 155

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=155: 0.228, 4.31E-3
('ss_dens_f', 'vol-hippocampus'), N=155: 0.015, 8.56E-1
('fs_dens_c', 'vol-thalamus'), N=155: 0.319, 5.35E-5
('fs_dens_c', 'vol-hippocampus'), N=155: 0.051, 5.25E-1

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=155: 0.013, 8.72E-1
('ss_dens_f', 'vol-hippocampus'), N=155: -0.083, 3.09E-1
('fs_dens_c', 'vol-thalamus'), N=155: 0.155, 5.51E-2
('fs_dens_c', 'vol-hippocampus'), N=155: -0.031, 7.02E

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('slowdelta_bandpower_total', 'mmse'), N=15: 0.428, 1.45E-1
('so_rate_f', 'mmse'), N=15: 0.002, 9.95E-1
('vol-ctx--anterior', 'mmse'), N=15: -0.338, 2.59E-1
('vol-thalamus', 'mmse'), N=15: 0.165, 5.90E-1

PEARSONR
('ss_dens_f', 'mmse'), N=15: -0.452, 9.09E-2
('fs_dens_c', 'mmse'), N=15: -0.076, 7.87E-1
('vol-thalamus', 'mmse'), N=15: -0.044, 8.77E-1
('vol-hippocampus', 'mmse'), N=15: 0.017, 9.51E-1

PARTIAL_PEARSONR
('ss_dens_f', 'mmse'), N=15: -0.435, 1.37E-1
('fs_dens_c', 'mmse'), N=15: -0.064, 8.37E-1
('vol-thalamus', 'mmse'), N=15: 0.165, 5.90E-1
('vol-hippocampus', 'mmse'), N=15: 0.046, 8.81E-1

PEARSONR
('perc_r', 'mmse'), N=15: 0.143, 6.12E-1
('vol-ctx--isthmuscingulate', 'mmse'), N=15: 0.221, 4.28E-1
('vol-amygdala', 'mmse'), N=15: 0.155, 5.80E-1
('vol-brain-stem', 'mmse'), N=15: -0.101, 7.19E-1

PARTIAL_PEARSONR
('perc_r', 'mmse'), N=15: 0.099, 7.48E-1
('vol-ctx--isthmuscingulate', 'mmse'), N=15: 0.262, 3.87E-1
('vol-amygdala', 'mmse'), N=15: 0.199, 5.14E-1
('vol-brain-stem', 

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=224: -0.003, 9.66E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=224: 0.011, 8.65E-1
('so_rate_f', 'vol-ctx--anterior'), N=224: -0.019, 7.78E-1
('so_rate_f', 'vol-thalamus'), N=224: 0.044, 5.18E-1
N = 224

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=224: 0.169, 1.14E-2
('ss_dens_f', 'vol-hippocampus'), N=224: 0.085, 2.03E-1
('fs_dens_c', 'vol-thalamus'), N=224: 0.292, 8.58E-6
('fs_dens_c', 'vol-hippocampus'), N=224: 0.2, 2.59E-3

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=224: -0.01, 8.81E-1
('ss_dens_f', 'vol-hippocampus'), N=224: -0.003, 9.61E-1
('fs_dens_c', 'vol-thalamus'), N=224: 0.129, 5.52E-2
('fs_dens_c', 'vol-hippocampus'), N=224: 0.112, 9.74E-2
N = 224

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=224: -0.027, 6.91E-1
('perc_r', 'vol-amygdala'), N=224: 0.036, 5.89E-1
('perc_r', 'vol-brain-stem'), N=224: 0.014, 8.29E-1

PARTIAL_PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=224: -0.046, 4.93E-1
('perc_

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('perc_r', 'vol-ctx--isthmuscingulate'), N=107: -0.064, 5.17E-1
('perc_r', 'vol-amygdala'), N=107: 0.004, 9.69E-1
('perc_r', 'vol-brain-stem'), N=107: 0.066, 5.02E-1
N = 107

PEARSONR
('alpha_bandpower_mean_o_w', 'vol-thalamus'), N=107: 0.085, 3.86E-1
('alpha_bandpower_mean_o_w', 'vol-total_ventricle'), N=107: -0.016, 8.73E-1

PARTIAL_PEARSONR
('alpha_bandpower_mean_o_w', 'vol-thalamus'), N=107: 0.056, 5.68E-1
('alpha_bandpower_mean_o_w', 'vol-total_ventricle'), N=107: 0.006, 9.52E-1

PEARSONR
('slowdelta_bandpower_total', 'mmse'), N=12: 0.048, 8.81E-1
('so_rate_f', 'mmse'), N=12: -0.006, 9.84E-1
('vol-ctx--anterior', 'mmse'), N=12: 0.25, 4.34E-1
('vol-thalamus', 'mmse'), N=12: 0.269, 3.98E-1

PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'mmse'), N=12: 0.098, 7.88E-1
('so_rate_f', 'mmse'), N=12: 0.022, 9.53E-1
('vol-ctx--anterior', 'mmse'), N=12: 0.078, 8.31E-1
('vol-thalamus', 'mmse'), N=12: 0.248, 4.89E-1

PEARSONR
('ss_dens_f', 'mmse'), N=12: 0.163, 6.12E-1
('fs_dens_c', 'mmse'), 

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


In [9]:
results.head()

Unnamed: 0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
"('slowdelta_bandpower_total', 'vol-ctx--anterior')",0.005645,0.968316,0.023748,0.869967,52,76.3 (4.7),0.42
"('slowdelta_bandpower_total', 'vol-thalamus')",0.093398,0.510172,-0.024266,0.867158,52,76.3 (4.7),0.42
"('so_rate_f', 'vol-ctx--anterior')",-0.039077,0.78328,-0.021372,0.882879,52,76.3 (4.7),0.42
"('so_rate_f', 'vol-thalamus')",0.149646,0.289663,0.07141,0.62215,52,76.3 (4.7),0.42
"('ss_dens_f', 'vol-thalamus')",0.249759,0.074152,0.176189,0.220977,52,76.3 (4.7),0.42


In [10]:
feature_pair = str(('slowdelta_bandpower_total', 'vol-ctx--anterior'))

df_feature_pair = df_multiindex.xs(key=feature_pair, level='feature_pair').copy()
df_feature_pair.index.name = feature_pair
# check how many of the "p partial_pearsonr" are significant:
df_feature_pair.loc['percentage significant', 'p partial_pearsonr'] = np.round(df_feature_pair['p partial_pearsonr'].apply(lambda x: float(x) < 0.05).sum() / len(df_feature_pair), 2)
df_feature_pair


Unnamed: 0_level_0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
"('slowdelta_bandpower_total', 'vol-ctx--anterior')",Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"dt all, age all",0.099891,0.012613,-0.043495,0.279159,623.0,60.9 (12.5),0.55
"dt all, age younger than 60",-0.014984,0.801148,-0.08369,0.160284,285.0,49.9 (7.9),0.61
"dt all, age between 60-70",0.000342,0.996291,0.027963,0.705553,187.0,65.1 (2.9),0.58
"dt all, age older than 70",0.076077,0.353186,0.059654,0.469877,151.0,76.6 (4.6),0.39
"dt within 1 year, age all",0.02006,0.755211,-0.093007,0.149166,244.0,60.8 (12.8),0.57
"dt within 1 year, age younger than 60",-0.15781,0.096552,-0.175566,0.066567,112.0,49.4 (7.4),0.62
"dt within 1 year, age between 60-70",0.02805,0.815066,0.09384,0.439703,72.0,65.2 (2.9),0.57
"dt within 1 year, age older than 70",0.116848,0.373941,0.103567,0.439135,60.0,76.9 (4.8),0.45
"dt between 1-2.5 years, age all",0.12452,0.122653,-0.011931,0.883629,155.0,62.1 (11.6),0.47
"dt between 1-2.5 years, age younger than 60",0.273256,0.026421,0.198934,0.115049,66.0,51.2 (7.2),0.55


In [11]:
df_multiindex.xs(key='dt all, age all', level='group').head(3)


Unnamed: 0_level_0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
feature_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"('slowdelta_bandpower_total', 'vol-ctx--anterior')",0.099891,0.01261336,-0.043495,0.279159,623,60.9 (12.5),0.55
"('slowdelta_bandpower_total', 'vol-thalamus')",0.271429,5.539088e-12,-0.012671,0.752656,623,60.9 (12.5),0.55
"('so_rate_f', 'vol-ctx--anterior')",0.083479,0.03724356,-0.01425,0.723024,623,60.9 (12.5),0.55


In [12]:
feature_pairs_all

["('slowdelta_bandpower_total', 'vol-ctx--anterior')",
 "('slowdelta_bandpower_total', 'vol-thalamus')",
 "('so_rate_f', 'vol-ctx--anterior')",
 "('so_rate_f', 'vol-thalamus')",
 "('ss_dens_f', 'vol-thalamus')",
 "('ss_dens_f', 'vol-hippocampus')",
 "('fs_dens_c', 'vol-thalamus')",
 "('fs_dens_c', 'vol-hippocampus')",
 "('perc_r', 'vol-ctx--isthmuscingulate')",
 "('perc_r', 'vol-amygdala')",
 "('perc_r', 'vol-brain-stem')",
 "('alpha_bandpower_mean_o_w', 'vol-thalamus')",
 "('alpha_bandpower_mean_o_w', 'vol-total_ventricle')",
 "('slowdelta_bandpower_total', 'mmse')",
 "('so_rate_f', 'mmse')",
 "('vol-ctx--anterior', 'mmse')",
 "('vol-thalamus', 'mmse')",
 "('ss_dens_f', 'mmse')",
 "('fs_dens_c', 'mmse')",
 "('vol-hippocampus', 'mmse')",
 "('perc_r', 'mmse')",
 "('vol-ctx--isthmuscingulate', 'mmse')",
 "('vol-amygdala', 'mmse')",
 "('vol-brain-stem', 'mmse')",
 "('alpha_bandpower_mean_o_w', 'mmse')",
 "('vol-total_ventricle', 'mmse')"]

In [13]:
# reverse the order of the index levels:
df_multiindex = df_multiindex.swaplevel(0, 1, axis=0).loc[feature_pairs_all]
# df_multiindex = df_multiindex.sort_index(axis=0, level=0)

# add a row of NaNs when there's a change of "feature_pair"
for feature_pair in feature_pairs_all:
    df_multiindex.loc[(feature_pair, 'zzz_blank'), :] = np.nan
df_multiindex = df_multiindex.loc[feature_pairs_all]
df_multiindex.to_csv(f'./results/results_hypotheses_sensitivity.csv', index_label='pair')


In [14]:
df_multiindex

Unnamed: 0_level_0,Unnamed: 1_level_0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
feature_pair,group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"('slowdelta_bandpower_total', 'vol-ctx--anterior')","dt all, age all",0.099891,0.012613,-0.043495,0.279159,623,60.9 (12.5),0.55
"('slowdelta_bandpower_total', 'vol-ctx--anterior')","dt all, age younger than 60",-0.014984,0.801148,-0.083690,0.160284,285,49.9 (7.9),0.61
"('slowdelta_bandpower_total', 'vol-ctx--anterior')","dt all, age between 60-70",0.000342,0.996291,0.027963,0.705553,187,65.1 (2.9),0.58
"('slowdelta_bandpower_total', 'vol-ctx--anterior')","dt all, age older than 70",0.076077,0.353186,0.059654,0.469877,151,76.6 (4.6),0.39
"('slowdelta_bandpower_total', 'vol-ctx--anterior')","dt within 1 year, age all",0.020060,0.755211,-0.093007,0.149166,244,60.8 (12.8),0.57
...,...,...,...,...,...,...,...,...
"('vol-total_ventricle', 'mmse')","dt between 2.5-7 years, age all",-0.234735,0.139599,-0.165474,0.314066,41,65.8 (10.5),0.44
"('vol-total_ventricle', 'mmse')","dt between 2.5-7 years, age younger than 60",-0.514033,0.087333,-0.451228,0.190550,12,52.7 (5.7),0.58
"('vol-total_ventricle', 'mmse')","dt between 2.5-7 years, age between 60-70",-0.383059,0.196378,-0.482803,0.132527,13,65.5 (2.4),0.54
"('vol-total_ventricle', 'mmse')","dt between 2.5-7 years, age older than 70",0.067001,0.805263,-0.004093,0.988922,16,75.9 (4.7),0.25


In [15]:
df.index

Index(['sid00', 'sid01', 'sid02', 'sid03', 'sid04', 'sid05', 'sid06', 'sid07',
       'sid08', 'sid09',
       ...
       'sid613', 'sid614', 'sid615', 'sid616', 'sid617', 'sid618', 'sid619',
       'sid620', 'sid621', 'sid622'],
      dtype='object', name='sid', length=623)

In [16]:
table1 = pd.read_csv(os.path.join('table1_information_deidentified.csv'), index_col=0)

assert np.isin(df.index, table1.index).all()


In [17]:
table1['dx_strata_cci<2'] = (table1['cci_score'] < 2).astype(int)
table1['dx_strata_cci2-4'] = ((table1['cci_score'] >= 2) & (table1['cci_score'] < 4)).astype(int)
table1['dx_strata_cci>=5'] = (table1['cci_score'] >= 5).astype(int)

# print N's
print(f"N = {len(table1)}")
print(f"N CCI<2 = {len(table1[table1['dx_strata_cci<2'] == 1])}")
print(f"N CCI 2-4 = {len(table1[table1['dx_strata_cci2-4'] == 1])}")
print(f"N CCI>=5 = {len(table1[table1['dx_strata_cci>=5'] == 1])}")

table1['dx_mci_dementia'] = table1['dx_dementia_cat'].apply(lambda x: 1 if x in ['mci', 'dementia'] else 0)
table1['dx_mci'] = table1['dx_dementia_cat'].apply(lambda x: 1 if x in ['mci'] else 0)
table1['dx_dementia'] = table1['dx_dementia_cat'].apply(lambda x: 1 if x in ['dementia'] else 0)

print(f"N MCI or dementia = {len(table1[table1['dx_mci_dementia'] == 1])}")
print(f"N MCI = {len(table1[table1['dx_mci'] == 1])}")
print(f"N dementia = {len(table1[table1['dx_dementia'] == 1])}")

disease_cats = ['dx_elix_depre', 'dx_cci_canc', 'dx_cci_diab', 'dx_cci_chf', 'dx_cci_pvd', 'dx_cci_cevd']

# make new disease categories for those diseases where people with mci or dementia are excluded:
for disease_cat in disease_cats:
    table1[f'{disease_cat}_no_mci_dementia'] = table1[disease_cat] * (1 - table1['dx_mci_dementia'])
    table1[f'{disease_cat}_no_mci'] = table1[disease_cat] * (1 - table1['dx_mci'])
    table1[f'{disease_cat}_no_dementia'] = table1[disease_cat] * (1 - table1['dx_dementia'])

    # print N's
    print(f"N {disease_cat}_no_mci_dementia = {len(table1[table1[f'{disease_cat}_no_mci_dementia'] == 1])}")

# and a category where none of the above diseases are present: # also no dementia or mci:
table1['dx_no_disease'] = table1[disease_cats + ['dx_mci_dementia']].sum(axis=1) == 0
print(f"N dx_no_disease = {len(table1[table1['dx_no_disease'] == 1])}")
# no disease and CCI < 2:
table1['dx_no_disease_cci<2'] = table1['dx_no_disease'] * table1['dx_strata_cci<2']
print(f"N dx_no_disease_cci<2 = {len(table1[table1['dx_no_disease_cci<2'] == 1])}")


N = 623
N CCI<2 = 240
N CCI 2-4 = 201
N CCI>=5 = 82
N MCI or dementia = 107
N MCI = 71
N dementia = 36
N dx_elix_depre_no_mci_dementia = 262
N dx_cci_canc_no_mci_dementia = 127
N dx_cci_diab_no_mci_dementia = 120
N dx_cci_chf_no_mci_dementia = 78
N dx_cci_pvd_no_mci_dementia = 87
N dx_cci_cevd_no_mci_dementia = 136
N dx_no_disease = 131
N dx_no_disease_cci<2 = 86


In [18]:
def rename(df_multiindex):

    # Create a list to store the modified levels
    new_levels = []

    # Loop through each level in the MultiIndex
    for level in df_multiindex.index.levels:
        # Apply the string replacement operation and append to new_levels
        new_levels.append(level.astype(str).str.replace('dx_strata_cci', 'CCI'))
        new_levels.append(level.astype(str).str.replace('dx_no_disease', 'No disease'))
        new_levels.append(level.astype(str).str.replace('dx_no_disease_cci<2', 'No disease, CCI<2'))
        new_levels.append(level.astype(str).str.replace('dx_elix_depre', 'Depression'))
        new_levels.append(level.astype(str).str.replace('dx_cci_canc', 'Cancer'))
        new_levels.append(level.astype(str).str.replace('dx_cci_diab', 'Diabetes'))
        new_levels.append(level.astype(str).str.replace('dx_cci_chf', 'Congestive heart failure'))
        new_levels.append(level.astype(str).str.replace('dx_cci_pvd', 'Peripheral vascular disease'))
        new_levels.append(level.astype(str).str.replace('dx_cci_cevd', 'Cerebrovascular disease'))
        new_levels.append(level.astype(str).str.replace('dx_mci', 'MCI'))
        new_levels.append(level.astype(str).str.replace('dx_dementia', 'Dementia'))

        new_levels.append(level.astype(str).str.replace('slowdelta_bandpower_total', 'Total slow and delta power'))
        new_levels.append(level.astype(str).str.replace('so_rate_f', 'SO rate frontal'))
        new_levels.append(level.astype(str).str.replace('ss_dens_f', 'Slow spindles density frontal'))
        new_levels.append(level.astype(str).str.replace('fs_dens_c', 'Fast spindles density central'))
        new_levels.append(level.astype(str).str.replace('perc_r', 'Percentage R'))
        new_levels.append(level.astype(str).str.replace('alpha_bandpower_mean_o_w', 'Mean alpha during W'))
        new_levels.append(level.astype(str).str.replace('vol-ctx--anterior', 'Anterior cortex'))
        new_levels.append(level.astype(str).str.replace('vol-thalamus', 'Thalamus'))
        new_levels.append(level.astype(str).str.replace('vol-hippocampus', 'Hippocampus'))
        new_levels.append(level.astype(str).str.replace('vol-amygdala', 'Amygdala'))
        new_levels.append(level.astype(str).str.replace('vol-brain-stem', 'Brainstem'))
        new_levels.append(level.astype(str).str.replace('vol-ctx--isthmuscingulate', 'Isthmuscingulate'))
        new_levels.append(level.astype(str).str.replace('vol-total_ventricle', 'Total ventricle volume'))
        new_levels.append(level.astype(str).str.replace('mmse', 'MMSE'))

    # Reconstruct the MultiIndex using the modified levels
    df_multiindex.index = pd.MultiIndex.from_arrays(new_levels, names=['group', 'feature_pair'])

    return df_multiindex


In [19]:
# SAME CODE AS ABOVE BUT FOR DISEASE GROUPS

disease_cats = ['dx_dementia', 'dx_mci']
disease_cats += [x + '_no_mci_dementia' for x in ['dx_elix_depre', 'dx_cci_canc', 'dx_cci_diab', 'dx_cci_chf', 'dx_cci_pvd', 'dx_cci_cevd']]
disease_cats += ['dx_strata_cci<2', 'dx_strata_cci2-4', 'dx_strata_cci>=5']
disease_cats += ['dx_no_disease', 'dx_no_disease_cci<2']

# add those disease categories to the df_all:
for disease_cat in disease_cats:
    df_all[disease_cat] = table1[disease_cat].loc[df_all.index]

results_collection = {}

# restrict df_sel to the intersection of dt and age groups:

for disease_cat in disease_cats:
    df_sel = df_all[df_all[disease_cat] == 1]

    namestr = disease_cat

    results = run_analysis_for_selected_cohort(df_sel)
    results.to_csv(f'./results/results_hypotheses_{namestr}.csv', index_label='pair')
    results_collection[namestr] = results

# make reuslts_collection a dataframe with multiindex.
df_multiindex = pd.concat(results_collection, axis=0, names=['group', 'feature_pair'])
feature_pairs_all = list(df_multiindex.xs(key='dx_mci', level='group').index)

# reverse the order of the index levels:
df_multiindex = df_multiindex.swaplevel(0, 1, axis=0).loc[feature_pairs_all]
df_multiindex = df_multiindex.sort_index(axis=0, level=0)
# sort the feature_pair value according to the disease_cats list:
df_multiindex = df_multiindex.reindex(disease_cats, level=1)

# add a row of NaNs when there's a change of "feature_pair"
for feature_pair in feature_pairs_all:
    df_multiindex.loc[(feature_pair, 'zzz_blank'), :] = np.nan
df_multiindex = df_multiindex.loc[feature_pairs_all]

# df_multiindex = rename(df_multiindex)

df_multiindex.to_csv(f'./results/results_hypotheses_sensitivity_diseases.csv', index_label='pair')

N = 36

PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=36: 0.311, 6.50E-2
('slowdelta_bandpower_total', 'vol-thalamus'), N=36: 0.343, 4.03E-2
('so_rate_f', 'vol-ctx--anterior'), N=36: 0.134, 4.34E-1
('so_rate_f', 'vol-thalamus'), N=36: 0.158, 3.57E-1

PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=36: 0.268, 1.25E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=36: 0.063, 7.23E-1
('so_rate_f', 'vol-ctx--anterior'), N=36: 0.128, 4.70E-1
('so_rate_f', 'vol-thalamus'), N=36: -0.078, 6.59E-1
N = 36

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=36: 0.361, 3.05E-2
('ss_dens_f', 'vol-hippocampus'), N=36: 0.32, 5.69E-2
('fs_dens_c', 'vol-thalamus'), N=36: 0.238, 1.61E-1
('fs_dens_c', 'vol-hippocampus'), N=36: 0.25, 1.41E-1

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=36: 0.248, 1.58E-1
('ss_dens_f', 'vol-hippocampus'), N=36: 0.199, 2.59E-1
('fs_dens_c', 'vol-thalamus'), N=36: 0.187, 2.89E-1
('fs_dens_c', 'vol-hippocampus'), N=36: 0.208, 2.37E-1
N =

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('ss_dens_f', 'vol-thalamus'), N=262: -0.029, 6.41E-1
('ss_dens_f', 'vol-hippocampus'), N=262: -0.054, 3.84E-1
('fs_dens_c', 'vol-thalamus'), N=262: 0.117, 5.90E-2
('fs_dens_c', 'vol-hippocampus'), N=262: 0.025, 6.88E-1
N = 262

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=262: 0.116, 6.17E-2
('perc_r', 'vol-amygdala'), N=262: 0.014, 8.24E-1
('perc_r', 'vol-brain-stem'), N=262: -0.049, 4.27E-1

PARTIAL_PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=262: 0.109, 7.86E-2
('perc_r', 'vol-amygdala'), N=262: -0.003, 9.65E-1
('perc_r', 'vol-brain-stem'), N=262: -0.027, 6.67E-1
N = 262

PEARSONR
('alpha_bandpower_mean_o_w', 'vol-thalamus'), N=262: 0.015, 8.09E-1
('alpha_bandpower_mean_o_w', 'vol-total_ventricle'), N=262: 0.139, 2.43E-2

PARTIAL_PEARSONR
('alpha_bandpower_mean_o_w', 'vol-thalamus'), N=262: 0.048, 4.43E-1
('alpha_bandpower_mean_o_w', 'vol-total_ventricle'), N=262: 0.137, 2.71E-2

PEARSONR
('slowdelta_bandpower_total', 'mmse'), N=54: 0.016, 9.06E-1
('so_rate_f', 'mmse

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('ss_dens_f', 'vol-thalamus'), N=120: 0.067, 4.74E-1
('ss_dens_f', 'vol-hippocampus'), N=120: -0.039, 6.75E-1
('fs_dens_c', 'vol-thalamus'), N=120: 0.18, 5.07E-2
('fs_dens_c', 'vol-hippocampus'), N=120: 0.174, 6.00E-2
N = 120

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=120: 0.086, 3.51E-1
('perc_r', 'vol-amygdala'), N=120: -0.042, 6.50E-1
('perc_r', 'vol-brain-stem'), N=120: -0.177, 5.34E-2

PARTIAL_PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=120: 0.068, 4.62E-1
('perc_r', 'vol-amygdala'), N=120: -0.048, 6.08E-1
('perc_r', 'vol-brain-stem'), N=120: -0.153, 9.80E-2
N = 120

PEARSONR
('alpha_bandpower_mean_o_w', 'vol-thalamus'), N=120: -0.049, 5.97E-1
('alpha_bandpower_mean_o_w', 'vol-total_ventricle'), N=120: 0.139, 1.31E-1

PARTIAL_PEARSONR
('alpha_bandpower_mean_o_w', 'vol-thalamus'), N=120: -0.015, 8.75E-1
('alpha_bandpower_mean_o_w', 'vol-total_ventricle'), N=120: 0.122, 1.89E-1

PEARSONR
('slowdelta_bandpower_total', 'mmse'), N=14: 0.116, 6.94E-1
('so_rate_f', 'mms

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())



PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=87: -0.126, 2.50E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=87: -0.071, 5.19E-1
('so_rate_f', 'vol-ctx--anterior'), N=87: -0.154, 1.60E-1
('so_rate_f', 'vol-thalamus'), N=87: -0.106, 3.36E-1
N = 87

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=87: 0.328, 1.90E-3
('ss_dens_f', 'vol-hippocampus'), N=87: -0.026, 8.10E-1
('fs_dens_c', 'vol-thalamus'), N=87: 0.275, 9.86E-3
('fs_dens_c', 'vol-hippocampus'), N=87: 0.004, 9.71E-1

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=87: 0.172, 1.15E-1
('ss_dens_f', 'vol-hippocampus'), N=87: -0.138, 2.08E-1
('fs_dens_c', 'vol-thalamus'), N=87: 0.171, 1.18E-1
('fs_dens_c', 'vol-hippocampus'), N=87: -0.074, 4.99E-1
N = 87

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=87: 0.1, 3.57E-1
('perc_r', 'vol-amygdala'), N=87: -0.011, 9.19E-1
('perc_r', 'vol-brain-stem'), N=87: -0.002, 9.82E-1

PARTIAL_PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=87: 0.08, 4.68E-1
('pe

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('so_rate_f', 'vol-ctx--anterior'), N=240: 0.078, 2.26E-1
('so_rate_f', 'vol-thalamus'), N=240: 0.204, 1.45E-3

PARTIAL_PEARSONR
('slowdelta_bandpower_total', 'vol-ctx--anterior'), N=240: 0.029, 6.61E-1
('slowdelta_bandpower_total', 'vol-thalamus'), N=240: -0.019, 7.75E-1
('so_rate_f', 'vol-ctx--anterior'), N=240: 0.006, 9.28E-1
('so_rate_f', 'vol-thalamus'), N=240: 0.024, 7.10E-1
N = 240

PEARSONR
('ss_dens_f', 'vol-thalamus'), N=240: 0.142, 2.80E-2
('ss_dens_f', 'vol-hippocampus'), N=240: 0.085, 1.90E-1
('fs_dens_c', 'vol-thalamus'), N=240: 0.328, 1.98E-7
('fs_dens_c', 'vol-hippocampus'), N=240: 0.16, 1.31E-2

PARTIAL_PEARSONR
('ss_dens_f', 'vol-thalamus'), N=240: 0.013, 8.38E-1
('ss_dens_f', 'vol-hippocampus'), N=240: 0.009, 8.93E-1
('fs_dens_c', 'vol-thalamus'), N=240: 0.149, 2.12E-2
('fs_dens_c', 'vol-hippocampus'), N=240: 0.046, 4.82E-1
N = 240

PEARSONR
('perc_r', 'vol-ctx--isthmuscingulate'), N=240: -0.035, 5.87E-1
('perc_r', 'vol-amygdala'), N=240: 0.059, 3.63E-1
('perc_r', 'v

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


('perc_r', 'mmse'), N=57: 0.233, 8.66E-2
('vol-ctx--isthmuscingulate', 'mmse'), N=57: 0.073, 5.97E-1
('vol-amygdala', 'mmse'), N=57: 0.259, 5.64E-2
('vol-brain-stem', 'mmse'), N=57: 0.033, 8.10E-1

PEARSONR
('alpha_bandpower_mean_o_w', 'mmse'), N=57: 0.187, 1.63E-1
('vol-thalamus', 'mmse'), N=57: 0.314, 1.75E-2
('vol-total_ventricle', 'mmse'), N=57: -0.127, 3.47E-1

PARTIAL_PEARSONR
('alpha_bandpower_mean_o_w', 'mmse'), N=57: 0.247, 6.86E-2
('vol-thalamus', 'mmse'), N=57: 0.134, 3.29E-1
('vol-total_ventricle', 'mmse'), N=57: -0.027, 8.46E-1
                                                    r pearsonr    p pearsonr  \
('slowdelta_bandpower_total', 'vol-ctx--anterior')    0.112056  1.132501e-01   
('slowdelta_bandpower_total', 'vol-thalamus')         0.361945  1.294989e-07   

                                                    r partial_pearsonr  \
('slowdelta_bandpower_total', 'vol-ctx--anterior')           -0.067708   
('slowdelta_bandpower_total', 'vol-thalamus')                -0.

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())
  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())



PEARSONR
('perc_r', 'mmse'), N=15: -0.121, 6.67E-1
('vol-ctx--isthmuscingulate', 'mmse'), N=15: 0.224, 4.23E-1
('vol-amygdala', 'mmse'), N=15: 0.373, 1.70E-1
('vol-brain-stem', 'mmse'), N=15: 0.172, 5.40E-1

PARTIAL_PEARSONR
('perc_r', 'mmse'), N=15: -0.203, 5.06E-1
('vol-ctx--isthmuscingulate', 'mmse'), N=15: 0.301, 3.18E-1
('vol-amygdala', 'mmse'), N=15: 0.412, 1.61E-1
('vol-brain-stem', 'mmse'), N=15: 0.183, 5.50E-1

PEARSONR
('alpha_bandpower_mean_o_w', 'mmse'), N=15: -0.376, 1.67E-1
('vol-thalamus', 'mmse'), N=15: 0.043, 8.80E-1
('vol-total_ventricle', 'mmse'), N=15: -0.341, 2.14E-1

PARTIAL_PEARSONR
('alpha_bandpower_mean_o_w', 'mmse'), N=15: -0.344, 2.50E-1
('vol-thalamus', 'mmse'), N=15: 0.121, 6.94E-1
('vol-total_ventricle', 'mmse'), N=15: -0.416, 1.58E-1
                                                    r pearsonr  p pearsonr  \
('slowdelta_bandpower_total', 'vol-ctx--anterior')    0.112282    0.201656   
('slowdelta_bandpower_total', 'vol-thalamus')         0.188443    0.

  results.loc[results.index.str.contains('mmse'), 'N'] = str(df_sel.mmse.notna().sum())


In [20]:
df_multiindex.xs(key='dx_mci', level='group').head(2)

Unnamed: 0_level_0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
feature_pair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"('slowdelta_bandpower_total', 'vol-ctx--anterior')",-0.020299,0.866566,-0.176919,0.145876,71,68.0 (9.7),0.48
"('slowdelta_bandpower_total', 'vol-thalamus')",0.225445,0.058714,0.003744,0.975643,71,68.0 (9.7),0.48


In [21]:
df_multiindex.xs(key="('vol-total_ventricle', 'mmse')", level='feature_pair')

Unnamed: 0_level_0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
dx_dementia,-0.037253,0.865991,0.007995,0.972562,23.0,71.8 (12.2),0.57
dx_mci,-0.198915,0.14542,-0.156674,0.262568,55.0,68.0 (10.1),0.49
dx_elix_depre_no_mci_dementia,-0.002292,0.986875,0.000872,0.995102,54.0,61.7 (10.6),0.46
dx_cci_canc_no_mci_dementia,0.187745,0.402775,0.251612,0.284564,22.0,67.0 (9.3),0.32
dx_cci_diab_no_mci_dementia,0.286783,0.320179,0.424514,0.16898,14.0,63.0 (8.1),0.21
dx_cci_chf_no_mci_dementia,-0.076693,0.822663,0.036255,0.926223,11.0,69.1 (10.6),0.36
dx_cci_pvd_no_mci_dementia,-0.496823,0.084138,-0.313166,0.348384,13.0,67.1 (9.2),0.15
dx_cci_cevd_no_mci_dementia,0.132045,0.548114,0.329002,0.145321,23.0,67.2 (10.6),0.39
dx_strata_cci<2,-0.243677,0.058432,-0.134123,0.311176,61.0,62.3 (11.9),0.41
dx_strata_cci2-4,-0.12675,0.347456,-0.026745,0.846316,57.0,66.8 (10.7),0.49


In [22]:
df_multiindex.xs(key="('fs_dens_c', 'vol-thalamus')", level='feature_pair')

Unnamed: 0_level_0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
dx_dementia,0.238382,0.1614862,0.187073,0.289418,36.0,73.3 (11.2),0.58
dx_mci,0.355451,0.002350669,0.091589,0.454177,71.0,68.0 (9.7),0.48
dx_elix_depre_no_mci_dementia,0.319198,1.28656e-07,0.117272,0.058977,262.0,57.8 (11.9),0.66
dx_cci_canc_no_mci_dementia,0.259723,0.003190427,0.191764,0.032164,127.0,64.5 (10.4),0.46
dx_cci_diab_no_mci_dementia,0.275949,0.002283128,0.180344,0.050674,120.0,60.6 (10.4),0.53
dx_cci_chf_no_mci_dementia,0.162995,0.1539126,0.012666,0.913525,78.0,65.4 (11.1),0.5
dx_cci_pvd_no_mci_dementia,0.275277,0.009864722,0.170655,0.118396,87.0,64.7 (10.9),0.47
dx_cci_cevd_no_mci_dementia,0.301449,0.0003619813,0.091055,0.295403,136.0,63.1 (10.8),0.52
dx_strata_cci<2,0.328103,1.981122e-07,0.149371,0.021155,240.0,56.9 (11.9),0.55
dx_strata_cci2-4,0.29013,2.942582e-05,0.078841,0.268337,201.0,61.6 (12.6),0.59


In [23]:
results

Unnamed: 0,r pearsonr,p pearsonr,r partial_pearsonr,p partial_pearsonr,N,age statistic,% female
"('slowdelta_bandpower_total', 'vol-ctx--anterior')",0.10813,0.321691,0.057029,0.606364,86,55.1 (12.1),0.5
"('slowdelta_bandpower_total', 'vol-thalamus')",0.171556,0.114244,-0.08181,0.459417,86,55.1 (12.1),0.5
"('so_rate_f', 'vol-ctx--anterior')",0.12765,0.241498,0.076599,0.488597,86,55.1 (12.1),0.5
"('so_rate_f', 'vol-thalamus')",0.174846,0.107364,-0.096034,0.384855,86,55.1 (12.1),0.5
"('ss_dens_f', 'vol-thalamus')",0.073659,0.500304,0.010652,0.92339,86,55.1 (12.1),0.5
"('ss_dens_f', 'vol-hippocampus')",0.047492,0.664129,0.033065,0.765252,86,55.1 (12.1),0.5
"('fs_dens_c', 'vol-thalamus')",0.191057,0.078046,0.082372,0.456328,86,55.1 (12.1),0.5
"('fs_dens_c', 'vol-hippocampus')",-0.014179,0.896904,-0.044103,0.690375,86,55.1 (12.1),0.5
"('perc_r', 'vol-ctx--isthmuscingulate')",-0.046961,0.66766,-0.019665,0.859075,86,55.1 (12.1),0.5
"('perc_r', 'vol-amygdala')",0.099591,0.361598,0.060737,0.583123,86,55.1 (12.1),0.5
