In [1]:
from pathlib import Path
import shutil
from bids import BIDSLayout
import pandas as pd
import numpy as np
from io import StringIO
import re
import nibabel as nb
#from nilearn import image as nli
from subprocess import run, PIPE
from collections import namedtuple
import json
from matplotlib import pyplot as plt
from joblib import Parallel, delayed, parallel_backend
%matplotlib inline

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.width', 1000)

from IPython.core.display import display, HTML
display(HTML("<style>"
    + "#notebook { padding-top:0px !important; } " 
    + ".container { width:100% !important; } "
    + ".end_space { min-height:0px !important; } "
    + "</style>"))

  from IPython.core.display import display, HTML


In [2]:
project_root = Path('..')
bids_dir = project_root / 'data/bids_defaced'
derivatives_dir = project_root / 'data/derivatives'
swarm_cmd_dir = project_root / 'swarm/fmriprep/swarm_cmds'
# sing_img_dir = Path('/data/MBDU/singularity_images/')
# image_path = (sing_img_dir/'fmriprep_v21.0.0.simg').as_posix()
# fs_licence_path = sing_img_dir/'license.txt'
# assert fs_licence_path.exists()
# fs_licence_path = fs_licence_path.as_posix()

# change this for other fmriprep runs
#run_name = 'rn_aroma'
#fmriprep_out = derivatives_dir / 'fmriprep' / run_name

#cmd_file = swarm_cmd_dir / run_name
#swarm_log_dir = project_root / ('swarm/fmriprep/swarm_logs' + run_name)
jobids = {}

In [3]:
# change this for other fmriprep runs
#run_name = 'fmriprepv20.2.2_complete_rerun'
run_name = 'fmriprep_v21.0.0'
fmriprep_out = derivatives_dir / 'fmriprep' / run_name
toc_dir = derivatives_dir / 'fitlins' / run_name
fmriprep_out.exists()

True

In [5]:
database_path='/data/MBDU/mid_share/notebooks/pybids140'
%time layout = BIDSLayout(bids_dir, database_path=database_path)

CPU times: user 17.4 ms, sys: 3.94 ms, total: 21.4 ms
Wall time: 125 ms


In [6]:
# subs = !ls /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprepv20.2.2/
# subs = subs[1:]
subs = layout.get(return_type='id', target='subject', suffix='T1w')

In [7]:
preproc_glob = 'sub-*/out/sub-*/ses-*/func/sub-*_ses-*_task-*_run-*_space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz'
bold_glob = 'sub-*/ses-*/func/sub-*_ses-*_task-*_run-*_bold.nii.gz'

In [8]:
preproced = sorted(fmriprep_out.glob(preproc_glob))
ppdf = []
for pp in preproced:
    row = {}
    ents = layout.parse_file_entities(pp.as_posix())
    row.update(ents)
    row['entities'] = ents
    row['file'] = pp
    ppdf.append(row)
ppdf = pd.DataFrame(ppdf)

In [10]:
raw = sorted(bids_dir.glob(bold_glob))
rwdf = []
for pp in raw:
    row = layout.parse_file_entities(pp.as_posix())
    row['file'] = pp
    if 'echo' in row.keys():
        if row['echo'] == '1':
            rwdf.append(row)
    else:
        rwdf.append(row)
rwdf = pd.DataFrame(rwdf)

In [11]:
missing_subs = []
for sub in subs:
    sn = sub.split('-')[-1]
    if not sn in ppdf.subject.values:
        missing_subs.append(sub)

In [12]:
missing_subs

['24085']

In [14]:
missing_sns = [ms.split("-")[-1] for ms in missing_subs]

In [15]:
alldf = rwdf.merge(ppdf, how='outer', on=['subject', 'session', 'task', 'run', 'datatype', 'extension'],
           suffixes=['', '_pp'], indicator=True)

In [16]:
partial_sns = alldf.query("_merge != 'both' & subject not in @missing_sns").subject.values

In [17]:
partial_sns

array(['23652', '23652', '23660', '23660', '23660', '23660', '23660',
       '23660'], dtype=object)

# add outlier and motion censoring columns to confounds file

In [18]:
def add_toc(row, n_dummy=4, fd_limit=1.0, outlier_fraction_limit=0.1):
    
    cfd = pd.read_csv(row.cfd_new_path, sep='\t')
    #cfd_bak_path = row.cfd_path.replace('regressors.tsv', 'bak.bak')
    #cfd_bak_path = Path(str(ppdf.cfd_path[0]).replace('timeseries.tsv','bak.bak'))
    #cfd.to_csv(cfd_bak_path, index=False, na_rep='n/a', sep='\t')
    if row.toc_path.exists():
        toc = np.array([int(ll.strip()) for ll in row.toc_path.read_text().split('\n')[:-1]])
    else:
        raise FileNotFoundError(row.toc_path)
    toc_frac = toc / row.ms
    assert len(toc) == row.nv

    cfd['temporal_outlier_fraction'] = toc_frac
    cfd['censored'] = (cfd.temporal_outlier_fraction > outlier_fraction_limit) | (cfd.framewise_displacement > fd_limit)
    cfd.censored = cfd.censored.astype(int)
    # censor dummy scans
    cfd.loc[:n_dummy - 1, 'censored'] = 1
    
    # drop previous censor columns
    cfd = cfd.loc[:, ~cfd.columns.str.contains('censor_')]
    # make censor columns
    c_cols = np.zeros((len(cfd.censored),len(cfd.censored)) ).astype(int)
    c_cols[np.diag_indices(len(cfd.censored))] = cfd.censored.values.astype(int)
    c_cols_names = [f'censor_{nn:03d}' for nn in range(cfd.censored.sum())]
    c_cols = pd.DataFrame(c_cols[:, cfd.censored.astype(bool)],
                          columns=c_cols_names)
    cfd = pd.concat([cfd, c_cols], axis=1)
    cfd = cfd.fillna(0)

    cfd.to_csv(row.cfd_new_path, index=False, na_rep='n/a', sep='\t')
    

def make_cens_cols(df, column):
    df = df.copy()
    df[column] = df[column].astype(int)
    c_cols = np.zeros((len(df[column]),len(df[column])) ).astype(int)
    c_cols[np.diag_indices(len(df[column]))] = df[column].values.astype(int)
    c_cols_names = [f'{column}_{nn:03d}' for nn in range(df[column].sum())]
    c_cols = pd.DataFrame(c_cols[:, df[column].astype(bool)],
                          columns=c_cols_names)
    df = pd.concat([df, c_cols], axis=1)
    df = df.fillna(0)
    return df

def expand_censor(row, fd_limit=1.0):
    cfd = pd.read_csv(row.cfd_new_path, sep='\t')
    cfd = cfd.drop(cfd.columns[cfd.columns.str.contains('fdplus1_')], axis=1, errors='ignore')
    cfd = cfd.drop(cfd.columns[cfd.columns.str.contains('strictfd_')], axis=1, errors='ignore')
    cfd = cfd.drop(cfd.columns[cfd.columns.str.contains('strictfdplus1_')], axis=1, errors='ignore')
    cfd = cfd.drop(cfd.columns[cfd.columns.str.contains('censfdplus1_')], axis=1, errors='ignore')
    cfd['prev_fd'] = np.nan
    cfd.loc[1:, 'prev_fd'] = cfd.iloc[:-1].loc[:, 'framewise_displacement'].values
    assert cfd.columns.str.contains('censfdplus1_').sum() == 0
    assert cfd.columns.str.contains('fdplus1_').sum() == 0
    assert cfd.columns.str.contains('strictfd_').sum() == 0
    assert cfd.columns.str.contains('strictfdplus1_').sum() == 0
    
    cfd['censfdplus1'] = (cfd.prev_fd > fd_limit) & ~(cfd.censored == 1)
    cfd = make_cens_cols(cfd, 'censfdplus1')
    cfd['strictfd'] = (cfd.framewise_displacement > (fd_limit/2)) & ~(cfd.censored == 1)
    cfd = make_cens_cols(cfd, 'strictfd')
    cfd['strictfdplus1'] = (cfd.prev_fd > (fd_limit/2)) & ~(cfd.censored == 1) & ~(cfd.strictfd == 1)
    cfd = make_cens_cols(cfd, 'strictfdplus1')
    cfd = cfd.drop(['censfdplus1', 'strictfd', 'strictfdplus1'], axis=1)
    
    cfd.to_csv(row.cfd_new_path, index=False, na_rep='n/a', sep='\t')

def calc_censored(df, prefix):
    ccols = df.columns[df.columns.str.contains(prefix)]
    tmp = df.loc[:, ccols]
    return ((tmp == 1).sum(0) == 1).sum()

def get_censored_cols(df, prefixes):
    if isinstance(prefixes, str):
        prefixes = [prefixes]
    ccols_ind = np.array([False]*len(cfd.columns))
    for pf in prefixes:
        ccols_ind = ccols_ind | df.columns.str.contains(pf)
    ccols = df.columns[ccols_ind]
    return ccols

In [19]:
def make_path(ents, updates, pattern, derivatives_dir, build_path, check_exist=True, derivatives=True):
    mp_ents = ents.copy()
    if derivatives:
        mp_dir = fmriprep_out / f'sub-{ents["subject"]}/out'
    else:
        mp_dir = derivatives_dir 
    mp_ents.update(updates)
    mp_file = mp_dir / build_path(mp_ents, pattern, validate=False, absolute_paths=False)
    if check_exist and not mp_file.exists():
        raise FileNotFoundError(mp_file.as_posix())
    else:
        assert mp_file.parent.exists()
    return mp_file

In [20]:
# add toc_file path to ppdf
toc_pattern = 'sub-{subject}/ses-{session}/func/sub-{subject}_ses-{session}_task-{task}_run-{run}_{suffix}.{extension}'
toc_updates = {'suffix':'toutcount', 'extension':'txt'}
ppdf['toc_path'] = ppdf.entities.apply(lambda x: make_path(x, toc_updates, toc_pattern, derivatives_dir, layout.build_path, check_exist=False))

# add mask_file path to ppdf
img_pattern = 'sub-{subject}/ses-{session}/func/sub-{subject}_ses-{session}_task-{task}_run-{run}_space-{space}_res-{res}_desc-{desc}_{suffix}.{extension}'
mask_updates = {'desc':'brain', 'suffix':'mask', 'extension':'nii.gz', 'res': 2}
ppdf['mask_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                            mask_updates,
                                                            img_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=False))
ppdf['mask_exists'] = ppdf.mask_path.apply(lambda x: x.exists())

#Drop runs without a brain mask
print("Can't find a brain mask for these.")
print(ppdf.loc[~ppdf.mask_exists, 'entities'])
ppdf = ppdf.loc[ppdf.mask_exists].copy()
ppdf = ppdf.drop('mask_exists', axis=1)

Can't find a brain mask for these.
473     {'subject': '23638', 'session': 'i3', 'task': 'mid', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
557    {'subject': '23660', 'session': 'i15', 'task': 'mid', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
Name: entities, dtype: object


In [21]:
# add tsnr path
tsnr_updates = {'desc':'tsnr', 'suffix':'bold', 'extension':'nii.gz', 'res':2}
ppdf['tsnr_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                            tsnr_updates,
                                                            img_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=False))
# add events path
event_pattern = 'sub-{subject}/ses-{session}/func/sub-{subject}_ses-{session}_task-{task}_run-{run}_{suffix}.{extension}'
event_updates = {'suffix':'events', 'extension':'tsv'}
ppdf['events_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                            event_updates,
                                                            event_pattern,
                                                            bids_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                             derivatives=False))

cfd_pattern = 'sub-{subject}/ses-{session}/func/sub-{subject}_ses-{session}_task-{task}_run-{run}_desc-{desc}_{suffix}.{extension}'
# add phys1d path
phys1d_updates = {'desc': 'physregs', 'suffix':'timeseries', 'extension':'slibase.1D'}
ppdf['phys1d_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                              phys1d_updates,
                                                            cfd_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                             derivatives=True))
# cheat to get the img_meta_path
ppdf['imgmeta_path'] = ppdf.file.apply(lambda x: Path(x.as_posix().replace(".nii.gz", '.json')))
                
# add phys path
phys_updates = ({'suffix':'physio', 'extension':'tsv.gz'})
ppdf['phys_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                            phys_updates,
                                                            event_pattern,
                                                            bids_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                            derivatives=False))
# add phys meta path
physmeta_updates = ({'suffix':'physio', 'extension':'json'})
ppdf['physmeta_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                            physmeta_updates,
                                                            event_pattern,
                                                            bids_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                            derivatives=False))

# add confounds file
confounds_updates = {'desc': 'confounds', 'suffix':'timeseries', 'extension': 'tsv'}
ppdf['cfd_orig_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                              confounds_updates,
                                                            cfd_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                             derivatives=True))

ppdf['cfd_orig_exists'] = ppdf.cfd_orig_path.apply(lambda x: x.exists())

#Drop runs without a confounds file
print("Can't find a confounds file for these.")
print(ppdf.loc[~ppdf.cfd_orig_exists, 'entities'])
ppdf = ppdf.loc[ppdf.cfd_orig_exists].copy()
ppdf = ppdf.drop('cfd_orig_exists', axis=1)

Can't find a confounds file for these.
308       {'subject': '23546', 'session': 'i3', 'task': 'rest', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
474       {'subject': '23638', 'session': 'i3', 'task': 'rest', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
475        {'subject': '23638', 'session': 'i4', 'task': 'mid', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
476       {'subject': '23638', 'session': 'i4', 'task': 'rest', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
477        {'subject': '23638', 'session': 'i5', 'task': 'mid', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold', 'datatype': 'func', 'extension': '.nii.gz'}
478       {'subject': '23638', 'session': 'i5', 'task': 'rest', 'run': 1, 'space': 'MNI152NLin2009cAsym', 'suffix': 'bold

In [22]:
confounds_updates = {'desc': 'confounds', 'suffix':'regressors', 'extension': 'tsv'}
ppdf['cfd_new_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                              confounds_updates,
                                                            cfd_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                             derivatives=True))

confoundsmeta_updates = {'desc': 'confounds', 'suffix':'timeseries', 'extension': 'json'}
ppdf['cfdjson_orig_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                              confoundsmeta_updates,
                                                            cfd_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=True,
                                                             derivatives=True))

confoundsmeta_updates = {'desc': 'confounds', 'suffix':'regressors', 'extension': 'json'}
ppdf['cfdjson_new_path'] = ppdf.entities.apply(lambda x: make_path(x,
                                                              confoundsmeta_updates,
                                                            cfd_pattern,
                                                            derivatives_dir,
                                                            layout.build_path,
                                                            check_exist=False,
                                                             derivatives=True))

In [23]:
ppdf['nv'] = ppdf.file.apply(lambda x: nb.load(x.as_posix()).get_fdata().shape[-1])
ppdf['ms'] = ppdf.mask_path.apply(lambda x: nb.load(x.as_posix()).get_fdata().sum())

In [24]:
ppdf['has_phys'] = ppdf.phys_path.apply(lambda x: x.exists())

In [25]:
no_events = ~ppdf.loc[ppdf.task != 'rest', 'events_path'].apply(lambda x: x.exists())
ppdf.loc[ppdf.task != 'rest'].loc[no_events].shape

(17, 25)

In [26]:
ppdf.loc[ppdf.task != 'rest'].shape

(654, 25)

## Create swarm to run 3dtoutcout

In [27]:
def make_toc_cmd(row):
    return f'module load afni; 3dToutcount -polort 2 -mask {row.mask_path} {row.file} > {row.toc_path}'

In [28]:
toc_cmds = list(ppdf.apply(lambda row: make_toc_cmd(row), axis=1).values)

In [29]:
run_name = 'toc'
swarm_cmd_dir = project_root / 'swarm/fitlins/swarm_cmds'
swarm_cmd_dir.mkdir(exist_ok=True, parents=True)
cmd_file = swarm_cmd_dir / run_name
swarm_log_dir = project_root / 'swarm/fitlins/swarm_logs' / run_name
swarm_log_dir.mkdir(exist_ok=True, parents=True)
cmd_file = Path(cmd_file.as_posix())
cmd_file.write_text('\n'.join(toc_cmds))
tmp = cmd_file.read_text().split('\n')
for i, tt in enumerate(tmp):
    print(tt)
    if i > 5:
        break

module load afni; 3dToutcount -polort 2 -mask /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-mid_run-1_space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-mid_run-1_space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz > /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-mid_run-1_toutcount.txt
module load afni; 3dToutcount -polort 2 -mask /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-rest_run-1_space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-rest_run-1_space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz > /dat

In [30]:
jobids[run_name] = ! swarm -f {cmd_file} --gres=lscratch:100 -g 10 -t 2 --module afni --time 1:00:00 --logdir {swarm_log_dir} --job-name {run_name} --partition quick,norm
jobids[run_name] = jobids[run_name][0]
jobids[run_name]

'40418225'

# Create swarm to run tsnr command

In [31]:
def make_tsnr_cmd(row, n_dummy):
    img_file_select = row.file.as_posix() + f'[{n_dummy}..$]'
    return f'module load afni; 3dTstat -prefix {row.tsnr_path} -tsnr {img_file_select} -overwrite'

In [32]:
tsnr_cmds = list(ppdf.apply(lambda row: make_tsnr_cmd(row, 4), axis=1).values)

In [33]:
run_name = 'tsnr'
swarm_cmd_dir = project_root / 'swarm/fitlins/swarm_cmds'
swarm_cmd_dir.mkdir(exist_ok=True, parents=True)
cmd_file = swarm_cmd_dir / run_name
swarm_log_dir = project_root / 'swarm/fitlins/swarm_logs' / run_name
swarm_log_dir.mkdir(exist_ok=True, parents=True)
cmd_file = Path(cmd_file.as_posix())
cmd_file.write_text('\n'.join(tsnr_cmds))
tmp = cmd_file.read_text().split('\n')
for i, tt in enumerate(tmp):
    print(tt)
    if i > 5:
        break

module load afni; 3dTstat -prefix /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-mid_run-1_space-MNI152NLin2009cAsym_res-2_desc-tsnr_bold.nii.gz -tsnr /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-mid_run-1_space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz[4..$] -overwrite
module load afni; 3dTstat -prefix /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-rest_run-1_space-MNI152NLin2009cAsym_res-2_desc-tsnr_bold.nii.gz -tsnr /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v1/func/sub-20900_ses-v1_task-rest_run-1_space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz[4..$] -overwrite
module load afni; 3dTstat -prefix /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-20900/out/sub-20900/ses-v4/func/

In [34]:
jobids[run_name] = ! swarm -f {cmd_file} --gres=lscratch:100 -g 10 -t 2 --module afni --time 1:00:00 --logdir {swarm_log_dir} --job-name {run_name} --partition quick,norm
jobids[run_name] = jobids[run_name][0]
jobids[run_name]

'40418566'

# Retrots the physio and put it in the confounds file

In [68]:
def make_retrots_command(row):
    tr = json.loads(row.imgmeta_path.read_text())['RepetitionTime']
    slice_list = [int((tr/2))]
    sf = json.loads(row.physmeta_path.read_text())['SamplingFrequency']
    cmd = f'module load afni; /gpfs/gsfs11/users/MBDU/mid_share/env/bin/python /usr/local/apps/afni/current/linux_centos_7_64/RetroTS.py  -phys_file {row.phys_path} -p {sf} -n 1 -v {tr} -slice_order "custom" -slice_offset {slice_list} -prefix {row.phys1d_path.as_posix()[:-11]} '
    return cmd

In [69]:
retrots_cmds = list(ppdf.loc[ppdf.has_phys].apply(lambda row: make_retrots_command(row), axis=1).values)

In [72]:
run_name = 'retrots'
swarm_cmd_dir = project_root / 'swarm/fitlins/swarm_cmds'
swarm_cmd_dir.mkdir(exist_ok=True, parents=True)
cmd_file = swarm_cmd_dir / run_name
swarm_log_dir = project_root / 'swarm/fitlins/swarm_logs' / run_name
swarm_log_dir.mkdir(exist_ok=True, parents=True)
cmd_file = Path(cmd_file.as_posix())
cmd_file.write_text('\n'.join(retrots_cmds[5:]))
tmp = cmd_file.read_text().split('\n')
for i, tt in enumerate(tmp):
    print(tt)
    if i > 5:
        break

module load afni; /gpfs/gsfs11/users/MBDU/mid_share/env/bin/python /usr/local/apps/afni/current/linux_centos_7_64/RetroTS.py  -phys_file /data/MBDU/mid_share/data/bids_defaced/sub-21111/ses-v1/func/sub-21111_ses-v1_task-rest_run-1_physio.tsv.gz -p 50 -n 1 -v 2.5 -slice_order "custom" -slice_offset [1] -prefix /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-21111/out/sub-21111/ses-v1/func/sub-21111_ses-v1_task-rest_run-1_desc-physregs_timeseries 
module load afni; /gpfs/gsfs11/users/MBDU/mid_share/env/bin/python /usr/local/apps/afni/current/linux_centos_7_64/RetroTS.py  -phys_file /data/MBDU/mid_share/data/bids_defaced/sub-21111/ses-v5/func/sub-21111_ses-v5_task-mid_run-1_physio.tsv.gz -p 50 -n 1 -v 2 -slice_order "custom" -slice_offset [1] -prefix /data/MBDU/mid_share/data/derivatives/fmriprep/fmriprep_v21.0.0/sub-21111/out/sub-21111/ses-v5/func/sub-21111_ses-v5_task-mid_run-1_desc-physregs_timeseries 
module load afni; /gpfs/gsfs11/users/MBDU/mid_share/env/bin/pyth

In [73]:
jobids[run_name] = ! swarm -f {cmd_file} --gres=lscratch:100 -g 10 -t 2 --module afni --time 1:00:00 --logdir {swarm_log_dir} --job-name {run_name} --partition quick,norm
jobids[run_name] = jobids[run_name][0]
jobids[run_name]

'40431117'

In [78]:
def add_physioto_confounds(row):
    phys_cols = ["phys_RVT0", "phys_RVT1", "phys_RVT2", "phys_RVT3", "phys_RVT4",
             "phys_Resp0", "phys_Resp1", "phys_Resp2", "phys_Resp3",
             "phys_Card0", "phys_Card1", "phys_Card2", "phys_Card3"]
    if row.phys1d_path.exists():
        phys_dat = pd.read_csv(row.phys1d_path, delim_whitespace=True, comment='#', names=phys_cols)
        old_cfd = pd.read_csv(row.cfd_orig_path, sep='\t')
        old_cfd = old_cfd.drop(phys_cols, axis=1, errors='ignore')
        old_cfd.iloc[0] = old_cfd.iloc[0].fillna(0)
        if phys_cols[0] not in old_cfd.columns:
            cfd = pd.concat([old_cfd, phys_dat], axis=1).loc[old_cfd.index]
            # make sure none of the original confound data gets changed
            assert (cfd.loc[:, old_cfd.columns].fillna(0) == old_cfd.fillna(0)).all().all()
            cfd.to_csv(row.cfd_new_path, index=False, na_rep='n/a', sep='\t')
        else:
            old_cfd.to_csv(row.cfd_new_path, index=False, na_rep='n/a', sep='\t')
    else:
        old_cfd = pd.read_csv(row.cfd_orig_path, sep='\t')
        old_cfd.to_csv(row.cfd_new_path, index=False, na_rep='n/a', sep='\t')

In [79]:
def add_new_jsons(row):
    old_json = json.load(open(row.cfdjson_orig_path))
    with open(row.cfdjson_new_path, "w") as outfile: json.dump(old_json,outfile)

In [80]:
ppdf.apply(add_physioto_confounds, axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
1324    None
1325    None
1326    None
1327    None
1328    None
Length: 1288, dtype: object

In [81]:
ppdf.apply(add_new_jsons, axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
1324    None
1325    None
1326    None
1327    None
1328    None
Length: 1288, dtype: object

In [98]:
# add outlier and motion censoring columns to confounds file
# run retrots
# put retrots outputs in confounds file
# tsnr images
# write model

# add censoring columns to confounds file

In [82]:
ppdf.apply(add_toc, axis=1)
ppdf.apply(expand_censor, axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
1324    None
1325    None
1326    None
1327    None
1328    None
Length: 1288, dtype: object

In [68]:
run_name

'retrots'

In [83]:
ppdf.to_csv(str(derivatives_dir/'summary_tables'/'prep_df.csv'),index=False)

# Save out database with prepped derivatives

In [176]:
#!rm -r '/data/MBDU/mid_share/notebooks/pybids131_with_derivs'

In [23]:
database_path='./pybids140_with_derivs'
%time layout = BIDSLayout(bids_dir, database_path=database_path, derivatives=fmriprep_out)

CPU times: user 14min 1s, sys: 3min 12s, total: 17min 14s
Wall time: 25min 8s
