# Feb 5, 2025: collect runwise roi timeseries
into a single folder

In [1]:
import os
import glob
import pandas as pd
import re
import numpy as np
from tqdm import tqdm
import nibabel as nib
from nilearn.input_data import NiftiLabelsMasker
from joblib import Parallel, delayed

In [2]:
class ARGS():
    pass

args = ARGS()

args.SEED = 100

np.random.seed(args.SEED)

In [3]:
BASE_path = f'{os.environ["HOME"]}/new_mouse_dataset'
PARCELS_path = f'{BASE_path}/parcels'

In [4]:
args.source = 'allen' #'allen'
args.space = 'ccfv2' #'ccfv2'
args.brain_div = 'whl' #'whl'
args.num_rois = 172 #216 #334 #162 #172
args.resolution = 200 #200

PARC_DESC = (
    f'source-{args.source}'
    f'_space-{args.space}'
    f'_braindiv-{args.brain_div}'
    f'_nrois-{args.num_rois}'
    f'_res-{args.resolution}'
)
PARC_DESC

'source-allen_space-ccfv2_braindiv-whl_nrois-172_res-200'

In [5]:
parcels_file = f'{PARCELS_path}/{PARC_DESC}_desc-parcels.nii.gz'
parcels_img = nib.load(parcels_file)
parcels = parcels_img.get_fdata()
# roi_labels = pd.read_csv(f'{PARCELS_path}/{PARC_DESC}_desc-names.csv')['roi'].to_numpy()

In [6]:
ROI_path = (
    f'{BASE_path}/roi-results-v3'
    f'/{PARC_DESC}'
)
os.system(f'mkdir -p {ROI_path}') 
TS_path = f'{ROI_path}/roi-timeseries'
os.system(f'mkdir -p {TS_path}')

0

In [7]:
DATA_path = (
    f'{BASE_path}/space-{args.space}_desc-data'
    f'/fMRI/SLC/REST'
)
DATA_path

data_files = sorted(glob.glob(f'{DATA_path}/session*gsr/*.nii.gz', recursive=True))
data_files

['/home/govindas/new_mouse_dataset/space-ccfv2_desc-data/fMRI/SLC/REST/session1_gsr/sub-SLC01_ses-1_task-rest_acq-EPI_run-11_bold_LPS_combined_cleaned2Allen.nii.gz',
 '/home/govindas/new_mouse_dataset/space-ccfv2_desc-data/fMRI/SLC/REST/session1_gsr/sub-SLC01_ses-1_task-rest_acq-EPI_run-15_bold_LPS_combined_cleaned2Allen.nii.gz',
 '/home/govindas/new_mouse_dataset/space-ccfv2_desc-data/fMRI/SLC/REST/session1_gsr/sub-SLC01_ses-1_task-rest_acq-EPI_run-19_bold_LPS_combined_cleaned2Allen.nii.gz',
 '/home/govindas/new_mouse_dataset/space-ccfv2_desc-data/fMRI/SLC/REST/session1_gsr/sub-SLC03_ses-1_task-rest_acq-EPI_run-15_bold_LPS_combined_cleaned2Allen.nii.gz',
 '/home/govindas/new_mouse_dataset/space-ccfv2_desc-data/fMRI/SLC/REST/session1_gsr/sub-SLC03_ses-1_task-rest_acq-EPI_run-19_bold_LPS_combined_cleaned2Allen.nii.gz',
 '/home/govindas/new_mouse_dataset/space-ccfv2_desc-data/fMRI/SLC/REST/session1_gsr/sub-SLC03_ses-1_task-rest_acq-EPI_run-23_bold_LPS_combined_cleaned2Allen.nii.gz',
 '/h

In [8]:
pattern = re.compile(
    r"sub-(?P<sub>\w+)_ses-(?P<ses>\d+)_task-(?P<task>\w+)_.*_run-(?P<run>\d+)_bold_.*\.nii\.gz"
)
pattern

re.compile(r'sub-(?P<sub>\w+)_ses-(?P<ses>\d+)_task-(?P<task>\w+)_.*_run-(?P<run>\d+)_bold_.*\.nii\.gz',
           re.UNICODE)

In [9]:
data_df = []
for data_file in tqdm(data_files):
    file_name = os.path.basename(data_file)
    match = pattern.match(file_name)
    metadata = match.groupdict()
    # {'sub': 'SLC04', 'ses': '2', 'task': 'rest', 'run': '17'}
    metadata['file'] = data_file
    data_df.append(metadata)
data_df = pd.DataFrame(data_df)

100%|██████████| 86/86 [00:00<00:00, 176992.22it/s]


In [10]:
data_df

Unnamed: 0,sub,ses,task,run,file
0,SLC01,1,rest,11,/home/govindas/new_mouse_dataset/space-ccfv2_d...
1,SLC01,1,rest,15,/home/govindas/new_mouse_dataset/space-ccfv2_d...
2,SLC01,1,rest,19,/home/govindas/new_mouse_dataset/space-ccfv2_d...
3,SLC03,1,rest,15,/home/govindas/new_mouse_dataset/space-ccfv2_d...
4,SLC03,1,rest,19,/home/govindas/new_mouse_dataset/space-ccfv2_d...
...,...,...,...,...,...
81,SLC09,3,rest,9,/home/govindas/new_mouse_dataset/space-ccfv2_d...
82,SLC10,3,rest,13,/home/govindas/new_mouse_dataset/space-ccfv2_d...
83,SLC10,3,rest,17,/home/govindas/new_mouse_dataset/space-ccfv2_d...
84,SLC10,3,rest,5,/home/govindas/new_mouse_dataset/space-ccfv2_d...


In [11]:
masker = NiftiLabelsMasker(
    labels_img=parcels_file, 
    standardize=True, # zscore
    # detrend=True, # remove linear trends in time
)

In [12]:
def extract_time_series(row):
    sub, ses, run, task = row[['sub', 'ses', 'run', 'task']]
    identity = f'sub-{sub}_ses-{ses}_run-{run}_task-{task}'
    ts_base_name = f'{identity}_desc-ts.txt'
    ts_file = f'{TS_path}/{ts_base_name}'
    ts = masker.fit_transform(row['file'])
    np.savetxt(ts_file, ts, fmt='%.3f')
    return None

In [13]:
results = Parallel(n_jobs=10)(
    delayed(extract_time_series)(row) 
    for idx, row in data_df.iterrows()
)