In [15]:
from os.path import basename, exists, join, splitext
from os import makedirs
import json
import pandas as pd
from natsort import natsorted
import os
from natsort import natsorted

In [16]:
## copied from sam nastase's extract confounds script ## 

Function for extracting aCompCor components

In [17]:
def extract_compcor(confounds_df, confounds_meta,
                    n_comps=5, method='tCompCor',
                    tissue=None):

    # Check that we sensible number of components
    assert n_comps > 0

    # Check that method is specified correctly
    assert method in ['aCompCor', 'tCompCor']

    # Check that tissue is specified for aCompCor
    if method == 'aCompCor' and tissue not in ['combined', 'CSF', 'WM']:
        raise AssertionError("Must specify a tissue type "
                             "(combined, CSF, or WM) for aCompCor")

    # Ignore tissue if specified for tCompCor
    if method == 'tCompCor' and tissue:
        print("Warning: tCompCor is not restricted to a tissue "
              f"mask - ignoring tissue specification ({tissue})")
        tissue = None

    # Get CompCor metadata for relevant method
    compcor_meta = {c: confounds_meta[c] for c in confounds_meta
                    if confounds_meta[c]['Method'] == method
                    and confounds_meta[c]['Retained']}

    # If aCompCor, filter metadata for tissue mask
    if method == 'aCompCor':
        compcor_meta = {c: compcor_meta[c] for c in compcor_meta
                        if compcor_meta[c]['Mask'] == tissue}

    # Make sure metadata components are sorted properly
    comp_sorted = natsorted(compcor_meta)
    for i, comp in enumerate(comp_sorted):
        if comp != comp_sorted[-1]:
            comp_next = comp_sorted[i + 1]
            assert (compcor_meta[comp]['SingularValue'] >
                    compcor_meta[comp_next]['SingularValue'])

    # Either get top n components
    if n_comps >= 1.0:
        n_comps = int(n_comps)
        if len(comp_sorted) >= n_comps:
            comp_selector = comp_sorted[:n_comps]
        else:
            comp_selector = comp_sorted
            print(f"Warning: Only {len(comp_sorted)} {method} "
                  f"components available ({n_comps} requested)")

    # Or components necessary to capture n proportion of variance
    else:
        comp_selector = []
        for comp in comp_sorted:
            comp_selector.append(comp)
            if (compcor_meta[comp]['CumulativeVarianceExplained']
                > n_comps):
                break

    # Check we didn't end up with degenerate 0 components
    assert len(comp_selector) > 0

    # Grab the actual component time series
    confounds_compcor = confounds_df[comp_selector]
    return confounds_compcor

Function for extracting group of (variable number) confounds

In [18]:
def extract_group(confounds_df, groups):
    
    # Expect list, so change if string
    if type(groups) == str:
        groups = [groups]
    
    # Filter for all columns with label
    confounds_group = []
    for group in groups:
        group_cols = [col for col in confounds_df.columns
                      if group in col]
        confounds_group.append(confounds_df[group_cols])
    confounds_group = pd.concat(confounds_group, axis=1)
    
    return confounds_group

Function for loading in confounds files

In [19]:
def load_confounds(confounds_fn):

    # Load the confounds TSV files
    confounds_df = pd.read_csv(confounds_fn, sep='\t')

    # Load the JSON sidecar metadata
    with open(splitext(confounds_fn)[0] + '.json') as f:
        confounds_meta = json.load(f)
    return confounds_df, confounds_meta

In [20]:
def load_confounds(confounds_fn):

    # Load the confounds TSV files
    confounds_df = pd.read_csv(confounds_fn +".tsv", sep='\t')

    # Load the JSON sidecar metadata
    with open(confounds_fn + '.json') as f:
        confounds_meta = json.load(f)
    return confounds_df, confounds_meta

Function for extracting confounds (including CompCor)

In [21]:
def extract_confounds(confounds_df, confounds_meta, model_spec):

    # Pop out confound groups of variable number
    groups = set(model_spec['confounds']).intersection(
                    ['cosine', 'motion_outlier'])

    # Grab the requested confounds
    confounds = confounds_df[[c for c in model_spec['confounds']
                              if c not in groups]]
    
    # Grab confound groups if present
    if groups:
        confounds_group = extract_group(confounds_df,
                                        groups)
        confounds = pd.concat([confounds, confounds_group],
                              axis=1)

    # Get aCompCor / tCompCor confounds if requested
    compcors = set(model_spec).intersection(
                    ['aCompCor', 'tCompCor'])
    if compcors:
        for compcor in compcors:
            if type(model_spec[compcor]) == dict:
                model_spec[compcor] = [model_spec[compcor]]
            for compcor_kws in model_spec[compcor]:
                confounds_compcor = extract_compcor(
                    confounds_df,
                    confounds_meta,
                    method=compcor,
                    **compcor_kws)
                confounds = pd.concat([confounds,
                                       confounds_compcor],
                                      axis=1)
    return confounds

## directories

In [22]:
top_dir = '/jukebox/graziano/coolCatIsaac/MEI'
data_dir = top_dir + "/data"
behav_dir = data_dir + '/behavioral'
rois_dir = data_dir + "/rois"
fmri_prep = data_dir + '/bids/derivatives/fmriprep'
work_dir = data_dir + '/work'


In [23]:
fmri_prep

'/jukebox/graziano/coolCatIsaac/MEI/data/bids/derivatives/fmriprep'

### sublist

In [24]:
## removed subject 01, 11, 15, subject 32

In [35]:
sub_list = [
    'sub-002', 'sub-003', 'sub-004', 'sub-005','sub-006','sub-007','sub-008','sub-009','sub-010',
    'sub-011','sub-012','sub-013','sub-014','sub-016','sub-017','sub-018','sub-019','sub-020','sub-021',
    'sub-022','sub-023','sub-024','sub-025','sub-026','sub-027','sub-028','sub-029','sub-030','sub-031','sub-032',
    'sub-033','sub-034','sub-035','sub-036','sub-037','sub-038','sub-039','sub-040'
]

sub_list = [
    'sub-002', 'sub-003', 'sub-004', 'sub-005','sub-006','sub-007','sub-008','sub-009','sub-010',
    'sub-012','sub-013','sub-014','sub-016','sub-017','sub-018','sub-019','sub-020','sub-021',
    'sub-022','sub-023','sub-024','sub-025','sub-026','sub-027','sub-028','sub-029','sub-030','sub-031',
    'sub-033','sub-034','sub-035','sub-036','sub-037','sub-038','sub-039','sub-040'
]
sub_list = ['sub-015']
tot_runs = 5

In [36]:
"""
Notes:
- Do not deal with subject confounds here
- need to write code that will grab the confounds for the SEVENTH run
sub-012: NO CONFOUNDS FOR RUN 5 -- OUTPUT RUN 6 AND RUN 7 BUT NO RUN 5
"""

'\nNotes:\n- Do not deal with subject confounds here\n- need to write code that will grab the confounds for the SEVENTH run\nsub-012: NO CONFOUNDS FOR RUN 5 -- OUTPUT RUN 6 AND RUN 7 BUT NO RUN 5\n'

## run

In [37]:
# check runs
for sub in sub_list:
    for run in range(1,tot_runs+1):
        if sub == 'sub-012' and run >=5: run+=1
        # run name
        file1 = f'{sub}/ses-01/func/%s_ses-01_task-None_run-{run:02d}_desc-confounds_timeseries' % (sub)
        #confounds_fn = os.path.join(fmri_prep, sub + "/ses-01/func","%s_ses-01_task-None_run-%s_desc-confounds_timeseries" % (sub, run))
        confounds_fn =  os.path.join(fmri_prep, file1)      # Set file for saving
        out_name = 'confs'
        out_dir = join(work_dir, out_name)

        model =  {'confounds':
                  ['trans_x', 'trans_y', 'trans_z',
                   'rot_x', 'rot_y', 'rot_z', 'cosine'],
                  'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'},
                               {'n_comps': 5, 'tissue': 'WM'}]}


        # Make directory if it doesn't exist
        if not exists(out_dir):
            makedirs(out_dir)

        # Loop through confound files (in case of multiple runs)
        #for confounds_fn in confounds_fns:
        confounds_df, confounds_meta = load_confounds(confounds_fn)

        # Extract confounds based on model
        confounds = extract_confounds(confounds_df,
                                      confounds_meta,
                                      model)
        
        
        # Also create CSVs with headers for convenience
        ort_csv = splitext(basename(confounds_fn).replace(
            'desc-confounds',
            f'desc-model'))[0] + '.csv'
        ort_fn = join(out_dir, ort_csv)
        #### save confound file !
        confounds.to_csv(ort_fn, sep=',', index=False)

        print(f"Assembled confound models for {sub}")

Assembled confound models for sub-015
Assembled confound models for sub-015
Assembled confound models for sub-015
Assembled confound models for sub-015
Assembled confound models for sub-015


In [21]:
run

1

In [27]:
f'{run:02d}'

'01'

### for anomaly subs 

In [14]:
# seven runs for these subjects
for sub in ['sub-029']:
    for run in range(7,8):
        # run name
        file1 = f'{sub}/ses-01/func/%s_ses-01_task-None_run-{run:02d}_desc-confounds_timeseries' % (sub)
        #confounds_fn = os.path.join(fmri_prep, sub + "/ses-01/func","%s_ses-01_task-None_run-%s_desc-confounds_timeseries" % (sub, run))
        confounds_fn =  os.path.join(fmri_prep, file1)      # Set file for saving
        
        # Set file for saving
        out_name = 'confs'
        out_dir = join(work_dir, out_name)

        model =  {'confounds':
                  ['trans_x', 'trans_y', 'trans_z',
                   'rot_x', 'rot_y', 'rot_z', 'cosine'],
                  'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'},
                               {'n_comps': 5, 'tissue': 'WM'}]}


        # Make directory if it doesn't exist
        if not exists(out_dir):
            makedirs(out_dir)

        # Loop through confound files (in case of multiple runs)
        #for confounds_fn in confounds_fns:
        confounds_df, confounds_meta = load_confounds(confounds_fn)

        # Extract confounds based on model
        confounds = extract_confounds(confounds_df,
                                      confounds_meta,
                                      model)
        
        
        # Also create CSVs with headers for convenience
        ort_csv = splitext(basename(confounds_fn).replace(
            'desc-confounds',
            f'desc-model'))[0] + '.csv'
        ort_fn = join(out_dir, ort_csv)
        #### save confound file !
        #confounds.to_csv(ort_fn, sep=',', index=False)

        print(f"Assembled confound models for {sub}")

Assembled confound models for sub-029


## sub 032

In [None]:
"""
SUB-032:
- grab the first run for session 1
- for runs after run 2, grab the second session
- subtract one run to match the run output, which is 1-5 here
- save the runs +1 so session two is equivalen to 2-6 instead of 1-5
"""

In [64]:
### SUBJECT 032 -- save each session 02 as run +1 ## 

for sub in ['sub-032']:
    for run in range(1,7):
        # run name
        if run < 2:
            file1 = f'{sub}/ses-01/func/%s_ses-01_task-None_run-{run:02d}_desc-confounds_timeseries' % (sub)
        else:
            run_adjust = run -1 
            file1 = f'{sub}/ses-02/func/%s_ses-02_task-None_run-{run_adjust:02d}_desc-confounds_timeseries' % (sub)
        confounds_fn =  os.path.join(fmri_prep, file1)      # Set file for saving
        
        # Set file for saving
        out_name = 'confs'
        out_dir = join(work_dir, out_name)

        model =  {'confounds':
                  ['trans_x', 'trans_y', 'trans_z',
                   'rot_x', 'rot_y', 'rot_z', 'cosine'],
                  'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'},
                               {'n_comps': 5, 'tissue': 'WM'}]}


        # Make directory if it doesn't exist
        if not exists(out_dir):
            makedirs(out_dir)

        # Loop through confound files (in case of multiple runs)
        #for confounds_fn in confounds_fns:
        confounds_df, confounds_meta = load_confounds(confounds_fn)

        # Extract confounds based on model
        confounds = extract_confounds(confounds_df,
                                      confounds_meta,
                                      model)
        
        
        # Also create CSVs with headers for convenience
        ## save each session 02 as run +1
        if run < 2:
            ort_csv = splitext(basename(confounds_fn).replace(
                'desc-confounds',
                f'desc-model'))[0] + '.csv'
            ort_fn = join(out_dir, ort_csv)
            #### save confound file !
            confounds.to_csv(ort_fn, sep=',', index=False)
        else:
            ort_csv = splitext(basename(confounds_fn).replace(
                f'{run_adjust:02d}_desc-confounds',
                f'{run:02d}_desc-model'))[0] + '.csv'
            ort_fn = join(out_dir, ort_csv)
            #### save confound file !
            confounds.to_csv(ort_fn, sep=',', index=False)

        print(f"Assembled confound models for {sub}")

Assembled confound models for sub-032
Assembled confound models for sub-032
Assembled confound models for sub-032
Assembled confound models for sub-032
Assembled confound models for sub-032
Assembled confound models for sub-032


## sub-015

In [None]:
tot_runs = 5

In [None]:
# check runs
for sub in ['sub-015']:
    for run in range(1,tot_runs+1):
        if sub == 'sub-012' and run >=5: run+=1
        # run name
        file1 = f'{sub}/ses-01/func/%s_ses-01_task-None_run-{run:02d}_desc-confounds_timeseries' % (sub)
        #confounds_fn = os.path.join(fmri_prep, sub + "/ses-01/func","%s_ses-01_task-None_run-%s_desc-confounds_timeseries" % (sub, run))
        confounds_fn =  os.path.join(fmri_prep, file1)      # Set file for saving
        out_name = 'confs'
        out_dir = join(work_dir, out_name)

        model =  {'confounds':
                  ['trans_x', 'trans_y', 'trans_z',
                   'rot_x', 'rot_y', 'rot_z', 'cosine'],
                  'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'},
                               {'n_comps': 5, 'tissue': 'WM'}]}


        # Make directory if it doesn't exist
        if not exists(out_dir):
            makedirs(out_dir)

        # Loop through confound files (in case of multiple runs)
        #for confounds_fn in confounds_fns:
        confounds_df, confounds_meta = load_confounds(confounds_fn)

        # Extract confounds based on model
        confounds = extract_confounds(confounds_df,
                                      confounds_meta,
                                      model)
        
        
        # Also create CSVs with headers for convenience
        ort_csv = splitext(basename(confounds_fn).replace(
            'desc-confounds',
            f'desc-model'))[0] + '.csv'
        ort_fn = join(out_dir, ort_csv)
        #### save confound file !
        #confounds.to_csv(ort_fn, sep=',', index=False)

        print(f"Assembled confound models for {sub}")

In [63]:
ort_csv

'sub-032_ses-02_task-None_run-06_desc-model_timeseries.csv'

In [47]:
confounds.columns

Index(['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z', 'cosine00',
       'cosine01', 'cosine02', 'cosine03', 'cosine04', 'cosine05', 'cosine06',
       'cosine07', 'a_comp_cor_00', 'a_comp_cor_01', 'a_comp_cor_02',
       'a_comp_cor_03', 'a_comp_cor_04', 'a_comp_cor_11', 'a_comp_cor_12',
       'a_comp_cor_13', 'a_comp_cor_14', 'a_comp_cor_15'],
      dtype='object')

In [2]:
model =  {'confounds':
          ['trans_x', 'trans_y', 'trans_z',
           'rot_x', 'rot_y', 'rot_z', 'cosine'],
          'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'},
                       {'n_comps': 5, 'tissue': 'WM'}]}

In [3]:
model

{'confounds': ['trans_x',
  'trans_y',
  'trans_z',
  'rot_x',
  'rot_y',
  'rot_z',
  'cosine'],
 'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'}, {'n_comps': 5, 'tissue': 'WM'}]}

In [30]:
# afni
"""
for sub in sub_list:
    for run in range(1,tot_runs+1):
                
        file1 = os.path.join(fmri_prep, sub + "/ses-01/func")
        confounds_fn = os.path.join(fmri_prep, sub + "/ses-01/func","%s_ses-01_task-None_run-%s_desc-confounds_timeseries" % (sub, run))

        # Set an AFNI pipeline output directory (either -smooth or -nosmooth)
        out_pipe = 'afni-head_mot'
        afni_dir = join(work_dir, afni_pipe)

        model =  {'confounds':
                  ['trans_x', 'trans_y', 'trans_z',
                   'rot_x', 'rot_y', 'rot_z', 'cosine'],
                  'aCompCor': [{'n_comps': 5, 'tissue': 'CSF'},
                               {'n_comps': 5, 'tissue': 'WM'}]}

        # Loop through tasks and subjects and grab confound files

        # Make directory if it doesn't exist
        ort_dir = join(afni_dir, sub, 'func')
        if not exists(ort_dir):
            makedirs(ort_dir)

        # Grab confound files for multiple runs if present
        #confounds_fns = natsorted(
        #    task_meta[task][subject]['confounds'])

        # Loop through confound files (in case of multiple runs)
        #for confounds_fn in confounds_fns:
        confounds_df, confounds_meta = load_confounds(confounds_fn)

        # Extract confounds based on model
        confounds = extract_confounds(confounds_df,
                                      confounds_meta,
                                      model)
        
        # Create output 1D file for AFNI and save
        ort_1d = splitext(basename(confounds_fn).replace(
            'desc-confounds',
            f'desc-model'))[0] + '.1D'
        ort_fn = join(ort_dir, ort_1d)
        #confounds.to_csv(ort_fn, sep='\t', header=False,
          #               index=False)
        
        # Also create CSVs with headers for convenience
        ort_csv = splitext(basename(confounds_fn).replace(
            'desc-confounds',
            f'desc-model'))[0] + '.csv'
        ort_fn = join(ort_dir, ort_csv)
        #confounds.to_csv(ort_fn, sep=',', index=False)

        print(f"Assembled confound models for {sub}")
"""

Assembled confound models for sub-012
Assembled confound models for sub-012
Assembled confound models for sub-012
Assembled confound models for sub-012
Assembled confound models for sub-012
Assembled confound models for sub-012
Assembled confound models for sub-013
Assembled confound models for sub-013
Assembled confound models for sub-013
Assembled confound models for sub-013
Assembled confound models for sub-013
Assembled confound models for sub-013
Assembled confound models for sub-014
Assembled confound models for sub-014
Assembled confound models for sub-014
Assembled confound models for sub-014
Assembled confound models for sub-014
Assembled confound models for sub-014
Assembled confound models for sub-016
Assembled confound models for sub-016
Assembled confound models for sub-016
Assembled confound models for sub-016
Assembled confound models for sub-016
Assembled confound models for sub-016
Assembled confound models for sub-017
Assembled confound models for sub-017
Assembled co