# Generate outlier reports based on `fmriprep` outputs



--------------------


#### HISTORY

* 27/9/2021 - created by hychan

In [1]:
import os, glob, imageio
import pandas as pd
import numpy as np

from nilearn import plotting, image
from sklearn.preprocessing import scale

import warnings 
warnings.simplefilter("ignore", UserWarning)



In [2]:
auto_motion_path = '/data00/tools/auto-motion-fmriprep'

fmriprep_path = '/data00/projects/bbprime/data/bids_data/derivatives/fmriprep'
output_path = '/data00/projects/bbprime/data/bids_data/derivatives/outlier'

nii_file = '../../data/bids_data/sub-{sub}/func/sub-{sub}_task-{task}_run-{run}_bold.nii.gz'

os.makedirs(output_path, exist_ok=True)

In [3]:
rules = {
    'gs>3': lambda df: np.abs(scale(df['global_signal'])) > 3,
    'fd>0.75': lambda df: df['framewise_displacement'] > 0.75
}

In [4]:
def extract_snakecase(bids_string):
    output = {}
    for key_value in bids_string.split("_"):
        if "-" in key_value:
            key, value = key_value.split('-')
            if key != "desc":
                output[key] = value
    return output

## Automatic motion outlier detection

Remember to update `confoundDir` and `outputDir` in `config.R`

In [6]:
cmd = f'cd {auto_motion_path} && Rscript auto_motion_fmriprep.R {os.path.abspath("config.R")}'
!{cmd}

Loading required package: tidyverse
Registered S3 methods overwritten by 'tibble':
  method     from  
  format.tbl pillar
  print.tbl  pillar
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──
[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.3
[32m✔[39m [34mtibble [39m 2.1.3     [32m✔[39m [34mdplyr  [39m 1.0.6
[32m✔[39m [34mtidyr  [39m 1.0.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.4.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: snakecase
Loading required package: caret
Loading required package: lattice

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift

Loading required pack

In [5]:
outlier = []
for reg_txt in glob.glob('../../data/bids_data/derivatives/outlier_syn/auto-motion-fmriprep/sub-*/sub-*_regressors.txt'):
    
    reg_df = pd.read_csv(reg_txt, sep='\t')
    bids_vars = extract_snakecase(os.path.basename(reg_txt))

    idx = np.where(reg_df['trash'] == 1)[0]
    
    if len(idx) > 0:

        bids_vars['outlier_type'] = 'auto'
        bids_vars['outlier_vol'] = idx

        outlier.append(pd.DataFrame(bids_vars))    

outlier = pd.concat(outlier, ignore_index=True)
outlier.to_csv(os.path.join(output_path, 'outlier_auto.csv'), index=False)

ValueError: No objects to concatenate

## Rule-based outlier detection

### Generate combined file and separate motion regressor files

In [5]:
outlier = []

for f in glob.glob(os.path.join(fmriprep_path, 'sub-*/func/*confounds_regressors.tsv')):
    print("*", end="")
    
    bn = os.path.basename(f)
    
    bids_vars = extract_snakecase(bn)
    
    confounds = pd.read_csv(f, sep="\t")   
    
    # motion regressor file
    motion_file = bn.replace("confounds", "motion")
    motion_dir = os.path.join(output_path, 'regressors', 'sub-' + bids_vars['sub'])
    
    confounds['trash'] = np.where((np.abs(scale(confounds['global_signal'])) > 3) | (confounds['framewise_displacement'] > 0.75), 1, 0)
    motion_df = confounds[["trans_x", "trans_y", "trans_z", "rot_x", "rot_y", "rot_z", "csf", "trash"]].copy()
    
    motion_df['euclidean_trans'] = np.linalg.norm(confounds[["trans_x", "trans_y", "trans_z"]], axis=1)
    motion_df['euclidean_rot'] = np.linalg.norm(confounds[["rot_x", "rot_y", "rot_z"]] * 50, axis=1)
    motion_df['euclidean_trans_deriv'] = motion_df['euclidean_trans'].diff().fillna(0)
    motion_df['euclidean_rot_deriv'] = motion_df['euclidean_rot'].diff().fillna(0)
    
    os.makedirs(motion_dir, exist_ok=True)
        
    motion_df.to_csv(os.path.join(motion_dir, motion_file), sep='\t', index=False)
    
    # combined outlier file
    for rule_name, rule_expr in rules.items():
        idx = np.where(rule_expr(confounds))[0]
        
        bids_vars['outlier_type'] = rule_name
        bids_vars['outlier_vol'] = idx
        
        outlier.append(pd.DataFrame(bids_vars))    

outlier = pd.concat(outlier, ignore_index=True)
outlier.to_csv(os.path.join(output_path, 'outlier_manual.csv'), index=False)

*****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************

## Generate reports

In [7]:
outlier = (pd.concat([pd.read_csv(x) for x in ['outlier_auto.csv', 'outlier_manual.csv']], ignore_index=True)
            .pivot_table(index=['sub','task','run','outlier_vol'], 
                              values='outlier_type', 
                              aggfunc=lambda x: ', '.join(x))
            .reset_index())

FileNotFoundError: [Errno 2] No such file or directory: 'outlier_auto.csv'

In [None]:
os.makedirs(os.path.join(output_path, 'outlier_imgs'), exist_ok=True)

for (sub, task, run), outlier_df in outlier.groupby(['sub','task','run']):
    
    print(sub, task, run)
    
    outlier_imgs = image.index_img(nii_file.format(sub=sub, task=task, run=int(run)), outlier_df['outlier_vol'])
    
    for img, vol, otype in zip(image.iter_img(outlier_imgs), outlier_df['outlier_vol'], outlier_df['outlier_type']):
        plotting.plot_anat(anat_img=img, 
                           cut_coords=[0,0,0], 
                           output_file=f'temp{vol:04d}.png', 
                           display_mode='ortho', 
                           title=f'Vol {vol:04d} - {otype}', 
                           annotate=False, draw_cross=False, black_bg='auto', dim='auto')
    
    images = []
    for vol in outlier_df['outlier_vol']:
        images.append(imageio.imread(f'temp{vol:04d}.png'))
    imageio.mimsave(os.path.join(output_path, f'outlier_imgs/sub-{sub}_task-{task}_run-{run}.gif'), images, fps=1)
    
    for img in glob.glob('temp*.png'):
        os.remove(img)

In [None]:
outlier_summary = outlier.pivot_table(index=['sub','task','run'], values='outlier_vol', aggfunc='count').reset_index().pivot(index='sub', columns=['task','run']).fillna(0)
outlier_summary.to_csv(os.path.join(output_path, 'outlier_summary.csv'), index=False)

## Create nuisance regressor files

In [8]:
outlier = pd.read_csv(os.path.join(output_path, 'outlier_manual.csv'))

In [9]:
outlier = []

for f in glob.glob(os.path.join(fmriprep_path, 'sub-*/func/*confounds_regressors.tsv')):
    print("*", end="")

************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************