In [None]:
import pandas as pd
import numpy as np

from abcd_tools.utils.io import load_tabular
from abcd_tools.utils.ConfigLoader import load_yaml
from abcd_tools.image.preprocess import compute_average_betas

In [None]:
params = load_yaml("../parameters.yaml")

In [None]:
def load_degrees_of_freedom(r1_fpath: str, r2_fpath: str) -> pd.DataFrame:
    """Load censored frame information for run averaging.

    Args:
        r1_fpath (str): Filepath to run 1 info
        r2_fpath (str): Filepath to run 2 info

    Returns:
        pd.DataFrame: DOFs for runs 1 and 2
    """
    r1_dof = load_tabular(r1_fpath, cols=['tfmri_sstr1_beta_dof'])
    r2_dof = load_tabular(r2_fpath, cols=['tfmri_sstr2_beta_dof'])

    return pd.concat([r1_dof, r2_dof], axis=1)

dof_r5 = load_degrees_of_freedom(params['mri_r1_dof_path_r5'], params['mri_r2_dof_path_r5'])
dof_r6 = load_degrees_of_freedom(params['mri_r1_dof_path_r6'], params['mri_r2_dof_path_r6'])

In [None]:
vol_info_path_r5 = params['vol_info_path_r5']
vol_info_path_r6 = params['vol_info_path_r6']

In [None]:
def concatenate_hemispheres(lh: pd.DataFrame, rh: pd.DataFrame) -> pd.DataFrame:
    """Concatenate left and right hemisphere dataframes

    Args:
        lh (pd.DataFrame): Left hemisphere data
        rh (pd.DataFrame): Right hemisphere data
    
    Returns:
        pd.DataFrame: Concatenated data
    """
    lh.columns = [c + '_lh' for c in lh.columns]
    rh.columns = [c + '_rh' for c in rh.columns]
    return pd.concat([lh, rh], axis=1)


In [None]:
beta_input_dir_r5 = params['beta_input_dir_r5']
beta_input_dir_r6 = params['beta_input_dir_r6']

beta_output_dir_r5 = params['beta_output_dir_r5']
beta_output_dir_r6 = params['beta_output_dir_r6']

processed_beta_dir_r5 = params['processed_beta_dir_r5']
processed_beta_dir_r6 = params['processed_beta_dir_r6']

In [None]:
# run1 = pd.read_parquet(beta_input_dir_r5 + 'SST_1_correct_go-lh.parquet')
# run2 = pd.read_parquet(beta_input_dir_r5 + 'SST_2_correct_go-lh.parquet')
# run1

In [None]:
run1 = pd.read_parquet(beta_input_dir_r6 + 'sst_cg_beta_r01_lh.parquet')
run2 = pd.read_parquet(beta_input_dir_r6 + 'sst_cg_beta_r02_lh.parquet')
run1

In [None]:
def parse_vol_info(vol_info: pd.DataFrame) -> pd.DataFrame:

    TPT_MAP = {
        'baseline': 'baseline_year_1_arm_1',
        '2year': '2_year_follow_up_y_arm_1',
        '4year': '4_year_follow_up_y_arm_1',
        '6year': '6_year_follow_up_y_arm_1',
    }

    tmp = vol_info.iloc[:, 0].str.split("_", expand=True)[[2, 3]]
    tmp.columns = ['src_subject_id', 'eventname']
    tmp['src_subject_id'] = 'NDAR_' + tmp['src_subject_id']
    tmp['eventname'] = tmp['eventname'].map(TPT_MAP)

    return tmp
vol_info_r6 = pd.read_parquet(vol_info_path_r6)
vol_info_r6 = parse_vol_info(vol_info_r6)
vol_info_r6

In [None]:
def compute_average_betas(run1: pd.DataFrame, run2: pd.DataFrame, 
    vol_info: pd.DataFrame, motion: pd.DataFrame,
    name: str, release='r6') -> pd.DataFrame:

    run1 = pd.concat([run1, vol_info], axis=1)
    run2 = pd.concat([run2, vol_info], axis=1)

    if release == 'r5':
        run1 = run1[run1['eventname'] == 'baseline_year_1_arm_1']
        run2 = run2[run2['eventname'] == 'baseline_year_1_arm_1']

        motion = motion.reset_index()
        motion = motion[motion['eventname'] == 'baseline_year_1_arm_1']
        motion = motion.set_index(['src_subject_id', 'eventname'])

    def _align(run1, run2, motion):
        """Align dataframes on index and columns."""
        motion.columns = ['run1_dof', 'run2_dof']

        run1, run2 = run1.align(run2, axis=1)
        run1, motion = run1.align(motion, axis=0)
        run2, motion = run2.align(motion, axis=0)

        return run1, run2, motion
    
    idx = ['src_subject_id', 'eventname']
    run1 = run1.set_index(idx)
    run2 = run2.set_index(idx)

    run1_stripped, run2_stripped, motion = _align(run1, run2, motion)
    
    # Betas == 0 are not included in the average
    run1_stripped[run1_stripped == 0] = np.nan
    run2_stripped[run2_stripped == 0] = np.nan

    # multiply Beta values by degrees of freedom
    run1_weighted = run1_stripped.mul(motion['run1_dof'], axis=0)
    run2_weighted = run2_stripped.mul(motion['run2_dof'], axis=0)

    # divide sum by total degrees of freedom
    num = run1_weighted.add(run2_weighted, axis=0)
    den = motion['run1_dof'] + motion['run2_dof']
    avg = num.div(den, axis=0)

    avg.columns = [c.replace('tableData', name + '_') for c in avg.columns]

   # remove columns and rows that are all missing, then remove rows missing anything


    return avg.dropna(how='all', axis=1).dropna(how='all', axis=0).dropna()

In [None]:
compute_average_betas(run1, run2, vol_info_r6, dof_r6, 'correct_go', release='r6')

In [None]:
beta_input_dir = "../../data/02_intermediate/"
beta_output_dir = "../../data/02_intermediate/avg_betas/"

def combine_betas(sst_conditions: dict, hemispheres: list, dof: pd.DataFrame, beta_input_dir: str, beta_output_dir: str,
    vol_info_path: str,release: str='r5') -> None:
    """Combine betas for SST conditions

    Args:
        sst_conditions (dict): SST conditions
        hemispheres (list): Hemispheres
        beta_input_dir (str): Directory containing beta data
        beta_output_dir (str): Directory to save combined betas
        vol_info_path (str): Path to volume information

    Returns:
        None
    """

    vol_info = pd.read_parquet(vol_info_path)

    for condition in sst_conditions.keys():
        betas = {}
        for hemi in hemispheres:

            if release == 'r5':
                run1_fpath = f"{beta_input_dir}SST_1_{sst_conditions[condition]}-{hemi}.parquet"
                run2_fpath = f"{beta_input_dir}SST_2_{sst_conditions[condition]}-{hemi}.parquet"
            elif release == 'r6':
                run1_fpath = f"{beta_input_dir}sst_{condition}_beta_r01_{hemi}.parquet"
                run2_fpath = f"{beta_input_dir}sst_{condition}_beta_r02_{hemi}.parquet"

            run1 = pd.read_parquet(run1_fpath)
            run2 = pd.read_parquet(run2_fpath)

            name = sst_conditions[condition]
            avg_betas = compute_average_betas(run1, run2, vol_info, dof, name=name, release=release)

            betas[hemi] = avg_betas

        betas_df = concatenate_hemispheres(betas['lh'], betas['rh'])

        betas_df.to_parquet(f"{beta_output_dir}average_betas_{condition}.parquet")


In [None]:
sst_conditions = {
    'cs': 'correct_stop',
    'cg': 'correct_go',
    'is': 'incorrect_stop',
    'ig': 'incorrect_go'
}
hemispheres = ['lh', 'rh']

combine_betas(sst_conditions, hemispheres, dof_r5, beta_input_dir_r5, beta_output_dir_r5, vol_info_path_r5, release='r5')

In [None]:
pd.read_parquet(beta_output_dir_r5 + 'average_betas_cs.parquet')

In [None]:
def load_mri_qc(mri_qc_path: str) -> pd.DataFrame:
    mri_qc = load_tabular(mri_qc_path, cols=['imgincl_sst_include'])
    return mri_qc[mri_qc['imgincl_sst_include'] == 1]
    
mri_qc_path = "../../data/01_raw/tabular/core/imaging/mri_y_qc_incl.csv"
mri_qc = load_mri_qc(mri_qc_path)
mri_qc


In [None]:
load_tabular(params['filtered_behavioral_path'])

In [None]:
def filter_avg_betas(mri_qc_df: pd.DataFrame, 
                    sst_conditions: list,
                    filtered_behavioral_path: str,
                    beta_output_dir: str,
                    processed_beta_dir: str) -> pd.DataFrame:
    """Filter average betas based on QC data

    Args:
        mri_qc_df (pd.DataFrame): MRI QC data
        sst_conditions (list): SST conditions
        beta_output_dir (str): Path to average betas
        processed_beta_dir (str): Path to processed betas

    Returns:
        None
    """

    # load targets
    filtered_behavioral = load_tabular(filtered_behavioral_path)
    n_targets = filtered_behavioral.shape[0]
    
    for condition in sst_conditions:
        avg_betas_fpath = f"{beta_output_dir}average_betas_{condition}.parquet"
        avg_betas = pd.read_parquet(avg_betas_fpath)

        # limit to available targets
        avg_betas = avg_betas[avg_betas.index.isin(filtered_behavioral.index)]

        print(f"{avg_betas.shape[0]} of {n_targets} subjects had MRI data for {condition}")

        nrows_before = avg_betas.shape[0]
        avg_betas = avg_betas[avg_betas.index.isin(mri_qc_df.index)]

        diff = nrows_before - avg_betas.shape[0]

        print(f"{diff} failed MRI QC for {condition}")

        avg_betas.to_parquet(f"{processed_beta_dir}processed_betas_{condition}.parquet")

        del(avg_betas)


filter_avg_betas(mri_qc, sst_conditions.keys(),
    params['filtered_behavioral_path'], beta_output_dir_r5, processed_beta_dir_r5)

In [None]:
pd.read_parquet(beta_output_dir_r5 + 'average_betas_cs.parquet')    

In [None]:
pd.read_parquet(f"{processed_beta_dir_r5}processed_betas_cs.parquet")

In [None]:
motion_path = "../../data/01_raw/abcd-sync/6.0/tabulated/img/mriqcrp203.csv"
scanner_path = "../../data/01_raw/abcd-sync/6.0/tabulated/img/abcd_mri01.csv"

def load_mri_confounds(motion_path: str, scanner_path: str, timepoints: list) -> pd.DataFrame:
    motion = load_tabular(motion_path, cols=['iqc_sst_all_mean_motion'], timepoints=timepoints)
    scanner = load_tabular(scanner_path, cols = ['mri_info_deviceserialnumber'], timepoints=timepoints)

    return pd.concat([motion, scanner], axis=1)
    
mri_confounds = pd.concat([motion, scanner], axis=1)


In [None]:
motion