Description: This script takes the framewise displacement (FD) values found in the fmriprep output .tsv files and identifies which participants had >2 standard deviations FD above the group average for motion exclusion.

In [7]:
import os
import glob
import pandas as pd
import numpy as np

In [None]:
#DIRECTORIES FOR RUNNING FROM COMMAND LINE

In [5]:
#directories for testing

fmriprep_out = '/cifs/butler/HBN_data/TD_test_set_output'
pp_out = '/cifs/butler/HBN_data/preprocessing/postprocessing_outputs' #save .csv here

In [8]:
#define functions

def list_directories(path, *keywords):
    os.chdir(path)
    dirs = []
    for keyword in keywords:
        dirs.extend([f for f in glob.glob(keyword) if os.path.isdir(f)])
    return [os.path.basename(d) for d in dirs] #returns list of directories in given folder

def list_files(path, *keywords): #keyword is type of file (e.g., "*T1w.nii.gz*", "*bold.nii.gz*", "*fMRI_epi.nii.gz*")
    os.chdir(path)
    files = []
    for keyword in keywords:
        files.extend(glob.glob(keyword))
    return [os.path.basename(file) for file in files] #returns list of files in given directory


In [82]:
sub_directories = list_directories(fmriprep_out, "sub*")

dict_list = []

for sub in sub_directories:
    sub_dict = {'id':sub}
    tsvs = list_files(fmriprep_out + "/" + sub + "/func/","*.tsv*") #list all tsv files in func output directory (runs/tasks)
    for tsv in tsvs:
        task = tsv.partition("task-")[2] #isolate task name in tsv (after task-, before _desc)
        task = task.partition("_desc")[0]
        sub_dict[task] = 0 #add task to subject 
        
        data = pd.read_csv(fmriprep_out + "/" + sub + "/func/" + tsv, sep='\t', header=0) #read in tsv
        fd_col = data["framewise_displacement"] #isolate framewise displacement column
        fd_col = fd_col.tail(-1) #drop first row/timepoint (NaN)
        mean_fd = np.mean(fd_col) #get mean FD per task per participant
        sub_dict[task] = mean_fd #update dict
    dict_list.append(sub_dict)
    
fd_df = pd.DataFrame(dict_list) #some NANs for participants who did not complete all tasks


In [157]:
group_fds = np.mean(fd_df[1:]) #get group average FD for all tasks/columns (except ID column)
group_sds = np.std(fd_df[1:]) #get SD of average FD
upper_lims = pd.DataFrame(group_fds + 2*group_sds).T

ts = []   
for task in upper_lims: #for each task
    #print(task)
    for i,p in fd_df.iterrows(): #for each participant
        #print(p[task])
        if p[task] > float(upper_lims[task]): #check if individual FD for task if greater than upper limit of group FD for task
            #print(p["id"], task)
            t = {}
            t[task] = "outlier"
            t["id"] = p["id"]
            ts.append(t)

#assemble in single df
df = pd.DataFrame.from_records(ts)
first_column = df.pop('id') #move ID column first
df.insert(0, 'id', first_column)
df

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return std(axis=axis, dtype=dtype, out=out, ddof=ddof, **kwargs)


Unnamed: 0,id,movieDM,movieTP,peer_run-1,peer_run-2,peer_run-3,rest_run-1,rest_run-2
0,sub-NDARAC853DTE,outlier,,,,,,
1,sub-NDARAC853DTE,,outlier,,,,,
2,sub-NDARAC904DMU,,outlier,,,,,
3,sub-NDARAK019ZR6,,outlier,,,,,
4,sub-NDARAC853DTE,,,outlier,,,,
5,sub-NDARAK019ZR6,,,outlier,,,,
6,sub-NDARAC853DTE,,,,outlier,,,
7,sub-NDARAC853DTE,,,,,outlier,,
8,sub-NDARAK019ZR6,,,,,outlier,,
9,sub-NDARAX283MAK,,,,,outlier,,


In [158]:
#dataframe gives outlier participants across all tasks, manually isolate columns for individual tasks

df.to_csv(pp_out+'/motion-outliers_all.csv', sep=',', index=False)