## Calculate features from ICs

- Edge fraction
- High frequency content
- ...
- (features from paper)

In [1]:
import os
import sys
import numpy as np
from nilearn.image import load_img, threshold_img, math_img, resample_to_img
from os.path import join, pardir
sys.path.append(pardir)
from bids import BIDSLayout
#from tqdm import tqdm



In [10]:
# Helper functions
def get_comps(metainfo_dict): # runinfo = bids layout?? think it's only needed to retrieve files
    """
    ...
    """
    # retrieve files
    mixmat = np.loadtxt(join(metainfo_dict['fullpath'], 'melodic_mix'))
    ica_nii_f = join(metainfo_dict['fullpath'], 'melodic_IC.nii.gz')
    comps_arr = load_img(ica_nii_f).get_fdata()
    return mixmat, comps_arr

def get_edge_mask(metainfo_dict, ds_layout): # runinfo = bids layout?? think it's only needed to retrieve files
    """
    ...
    """
    # retrieve files
    brainmask_f = ds_layout.get(scope='fmriprep', return_type='filename',
                                subject=metainfo_dict['subject'],
                                session=metainfo_dict['session'],
                                run=metainfo_dict['run'],
                                task=metainfo_dict['task'],
                                space=metainfo_dict['space'],
                                desc='brain_mask',
                                extension='nii.gz')
    csf_anat_f = ds_layout.get(scope='fmriprep', return_type='filename',
                                subject=metainfo_dict['subject'],
                                session=metainfo_dict['session'],
                                run=metainfo_dict['run'],
                                task=metainfo_dict['task'],
                                space=metainfo_dict['space'],
                                acq='brain_mask',
                                extension='nii.gz')
    #pjoin(
    #    self.bidsroot, 'derivatives', 'fmriprep', f"sub-{runinfo['subject']}", 'anat',
    #    f"sub-{runinfo['subject']}_acq-prescannormalized_rec-pydeface_label-CSF_probseg.nii.gz"
    #)
    csf_func = threshold_img(
        resample_to_img(csf_anat_f, brainmask_f, interpolation='linear'),
        threshold=1.
    )
    # load image?
    brainmask = load_img(brainmask_f).get_fdata()
    mask_img = math_img('img1 - img2', img1=brainmask_f, img2=csf_func)
    mask_arr = mask_img.get_fdata()
    # worked okayish with erosion iterations=2
    # what is ero mask?
    ero_mask = binary_erosion(mask_arr, iterations=self.edgefrac_thickness).astype(int)
    edgemask = mask_arr - ero_mask
    return edgemask.astype(bool), brainmask.astype(bool)


In [3]:
# Edge fraction
def calc_edgefrac(comp_arr, edgemask, brainmask):
    return np.absolute(comp_arr[edgemask]).sum() / np.absolute(comp_arr[brainmask]).sum()

In [4]:
# High frequency content
def calc_hfc(timeseries, tr=1.5):
    """Calculate high frequency content for time series data. Tr can generally mean sampling rate in seconds."""
    nf = (1. / tr) * .5  # nyquist
    freqs, power = periodogram(timeseries, fs=1. / tr)
    relcumsum = np.cumsum(power) / power.sum()
    freqind = np.argmin(np.absolute(relcumsum - .5))
    hfc = freqs[freqind] / nf
    return hfc

In [5]:
# paths
bidsdata_dir = '/LOCAL/jzerbe/faces_vs_houses/ds002938'
base_dir = '/LOCAL/jzerbe/temp_results'
melodic_base_dir = join(bidsdata_dir, 'derivatives', 'melodic')

In [6]:
# create layout
ds_layout = BIDSLayout(bidsdata_dir, derivatives=True)

In [15]:
# main
results_dicts = []
melodic_entities = ds_layout.get(scope='melodic', return_type='filename', suffix='IC', extension='nii.gz')

for entity in melodic_entities:
    # cumbersome workaround to get correct filenames (TODO: better filenaming!)
    melodic_dir_split = entity.split('/')
    dir_name = melodic_dir_split[-2]
    metainfo_split = dir_name.split('_')
    metainfo_dict = {'subject':(metainfo_split[0])[4:], 'session':(metainfo_split[1])[4:],
                     'task':(metainfo_split[2])[5:], 'run':(metainfo_split[3])[4:],
                     'space':(metainfo_split[4])[6:-8], 'directory':melodic_dir_split[-2],
                     'fullpath':'/'.join(melodic_dir_split[:-1])}
    metainfo_dict['session'] = None if metainfo_dict['session'] == 'None' else metainfo_dict['session']
    metainfo_dict['run'] = None if metainfo_dict['run'] == 'None' else metainfo_dict['run']
    
    mixmat, comps_arr = get_comps(metainfo_dict)
    edgemask, brainmask = self.get_edge_mask(metainfo_dict, ds_layout)
    

In [17]:
print(metainfo_dict['session'])

None


In [None]:
edgemask, brainmask = self.get_edge_mask(runinfo)

for bo in tqdm(bidsobs, desc='iterating over runs'): # what if there are no runs i.e. run = ''?
    # get_entities() rint all the entities associated with this file, and their values
    # ex: {'datatype': 'func',
    #      'extension': 'nii.gz',
    #      'session': 'test',
    #      'subject': '01',
    #      'suffix': 'bold',
    #      'task': 'fingerfootlips'}
    runinfo = bo.get_entities() # gives back names of files?
    
    for comp_i in range(mixmat.shape[-1]):
        results_dict = {'subject': runinfo['subject'], 'session': runinfo['session'], 'task': runinfo['task'],
                                'run': runinfo['run']}
        comp_arr = comps_arr[:, :, :, comp_i]
        comp_ts = mixmat[:, comp_i]
        # Calculate edge fraction
        results_dict['edgefrac'] = calc_edgefrac(comp_arr, edgemask, brainmask)
        # Calculate high frequency content
        results_dict['hfc'] = calc_hfc(comp_ts)
        results_dicts.append(results_dict)

