In [None]:
import pandas as pd

from abcd_tools.utils.io import load_tabular, apply_nda_names
from abcd_tools.utils.ConfigLoader import load_yaml


In [None]:
params = load_yaml("../parameters.yaml")

In [None]:
betas = params['processed_beta_dir_r6']
cg = pd.read_parquet(betas + 'processed_betas_cs.parquet')
cg

In [None]:
def strip_names(df: pd.DataFrame, names= list, 
                idx:list =['src_subject_id', 'eventname', 'condition'],
                set_index=True,
                reset_index=True) -> pd.DataFrame:
    """Remove strings from column names.

    Args:
        df (pd.DataFrame): Dataframe whose columns will be modified.
        names (list): List of strings to remove from column names.
        idx (list): Index columns. Defaults to ['src_subject_id', 'eventname', 'condition'].

    Returns:
        pd.DataFrame: Dataframe with  modified column names.
    """
    if set_index:
        df = df.set_index(idx)
    columns = [c.replace(n, '') for n in names for c in df.columns if n in c]

    if len(columns) == 0:
        pass
    else:
        df.columns = columns

    if reset_index:
        df = df.reset_index()
    return df

def drop_cols(df: pd.DataFrame, drop_strings: list) -> pd.DataFrame:
    """Helper function to drop columns containing a given string.

    Args:
        df (pd.DataFrame): Dataframe to filter.
        drop_strings (list): List of strings to drop.

    Returns:
        pd.DataFrame: Filtered dataframe.
    """
    cols = [c for c in df.columns if not any([s in c for s in drop_strings])]
    return df[cols]

def recover_conditions(df1, df2, names1, names2, newname):
    idx = ['src_subject_id', 'eventname']
    df1 = strip_names(df1, names1, set_index=False).set_index(idx)
    df2 = strip_names(df2, names2, set_index=False).set_index(idx)

    df1, df2 = df1.align(df2, join='inner', axis=1)
    df = df1 + df2

    df = drop_cols(df, ['run1', 'run2'])
    df.columns = [c.replace('all', 'all_' + newname) for c in df.columns]

    return df

In [None]:
 def load_contrasts(fpath, dict_path="../../data/01_raw/tabular/abcd_5-1_dictionary.csv"):
    contrasts = load_tabular(fpath)
    contrasts = apply_nda_names(contrasts, dict_path)
    return contrasts

In [None]:
sst_csvcg_names= ['correct.stop.vs.correct.go_']
sst_cgvfx_names= ['correct.go.vs.fixation_']
sst_isvcg_names= ['incorrect.stop.vs.correct.go_']
sst_igvcg_names= ['incorrect.go.vs.correct.go_']

In [None]:
base_path = "../../data/01_raw/tabular/core/imaging/"



csvcg_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_csvcg_dst.csv')
isvcg_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_isvcg_dst.csv')
igvcg_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_igvcg_dst.csv')
cgvfx_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_cgvfx_dst.csv')


In [None]:
correct_stop = recover_conditions(csvcg_dst, cgvfx_dst, sst_csvcg_names, sst_cgvfx_names, 'correct_stop')
incorrect_stop = recover_conditions(isvcg_dst, cgvfx_dst, sst_isvcg_names, sst_cgvfx_names, 'incorrect_stop')
incorrect_go = recover_conditions(igvcg_dst, cgvfx_dst, sst_igvcg_names, sst_cgvfx_names, 'incorrect_go')

In [None]:
cgvfx_dst = drop_cols(cgvfx_dst, ['run1', 'run2'])
cgvfx_dst.columns = [c.replace('all', 'all_' + 'correct_go') for c in cgvfx_dst.columns]
cgvfx_dst

In [None]:
df = pd.concat([correct_stop, incorrect_stop, incorrect_go, cgvfx_dst], axis=1)
df

In [None]:
def make_roi_dataset(params: dict) -> pd.DataFrame:
    """Create a dataset with ROI data.

    Args:
        params (dict): Dictionary with parameters.
    
    Returns:
        pd.DataFrame: ROI dataset.
    """

    csvcg_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_csvcg_dst.csv')
    isvcg_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_isvcg_dst.csv')
    igvcg_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_igvcg_dst.csv')
    cgvfx_dst = load_contrasts(base_path + 'mri_y_tfmr_sst_cgvfx_dst.csv')

    sst_csvcg_names = params['sst_csvcg_names']
    sst_cgvfx_names = params['sst_cgvfx_names']
    sst_isvcg_names = params['sst_isvcg_names']
    sst_igvcg_names = params['sst_igvcg_names']

    correct_stop = recover_conditions(csvcg_dst, cgvfx_dst, sst_csvcg_names, sst_cgvfx_names, 'correct_stop')
    incorrect_stop = recover_conditions(isvcg_dst, cgvfx_dst, sst_isvcg_names, sst_cgvfx_names, 'incorrect_stop')
    incorrect_go = recover_conditions(igvcg_dst, cgvfx_dst, sst_igvcg_names, sst_cgvfx_names, 'incorrect_go')

    cgvfx_dst = drop_cols(cgvfx_dst, ['run1', 'run2'])
    cgvfx_dst.columns = [c.replace('all', 'all_' + 'correct_go') for c in cgvfx_dst.columns]

    df = pd.concat([correct_stop, incorrect_stop, incorrect_go, cgvfx_dst], axis=1)
    return df

df = make_roi_dataset(params)

In [None]:
df

In [None]:
mri_qc = load_tabular(params['mri_qc_path'])
mri_qc = mri_qc[mri_qc['imagincl']]

In [None]:
def filter_rois(roi_betas: pd.DataFrame, params: dict) -> pd.DataFrame:
    """Filter ROI dataset.

    Args:
        roi_betas (pd.DataFrame): ROI dataset.
        params (dict): Dictionary with parameters.
    
    Returns:
        pd.DataFrame: Filtered ROI dataset.
    """

    filtered_behavioral = load_tabular(params['filtered_behavioral_path'])
    mri_qc = load_tabular(params['mri_qc_path'])

    roi_betas = roi_betas[roi_betas.index.isin(filtered_behavioral.index)]
    roi_betas = roi_betas[roi_betas.index.isin(mri_qc.index)]

    return roi_betas

filter_rois(df, params)