This notebook is used to:
1. preprocess fMRI data and behavior data
2. resample the atlas
2. extract each ROI data and handle NaNs

In [None]:
import numpy as np
import h5py
import pandas as pd
from nilearn import datasets
from nilearn.image import resample_to_img
import nibabel as nib

### 1. save fMRI and behavior data to np

In [None]:
# save free recall matrix
free_rec = pd.read_excel('data/free recall performance.xls', header=None)
free_rec_np = free_rec.values
submat = free_rec_np.T
np.save('data/free_recall_mat.npy', submat)

In [None]:
# save full encoding data
path = "data/full_encoding_data.mat"
# general check
with h5py.File(path,"r") as h5_file:
    print("keys at top level", list(h5_file.keys()))
matdata=h5py.File(path, 'r')
n0 = np.zeros(matdata['ordmaps'].shape, dtype='float32')
matdata['ordmaps'].read_direct(n0)
matdata.close()
print(n0.shape) # n1.shape: 46, 80, 67, 100, 72
n1 = np.transpose(n0, (2,1,0,3,4)) # change to: 67, 80, 46, 100, 72
n2 = np.reshape(n1, (67, 80, 46, 7200))
print(n2.shape)

keys at top level ['ordmaps']
(46, 80, 67, 100, 72)
(67, 80, 46, 7200)


In [None]:
np.save('data/full_encoding_data_5d.npy', n1)
np.save('data/full_encoding_data.npy', n2)

In [None]:
# load when required
beta = np.load('data/full_encoding_data.npy')

### 2. atlas resample to current beta dimensions

In [3]:
affine = np.array([
    [-2.75, 0., 0., 90.75],
    [0., 2.75, 0., -126.5],
    [0., 0., 4., -72.],
    [0., 0., 0., 1.]
])

In [None]:
# cortical atlas
cort = datasets.fetch_atlas_harvard_oxford('cort-maxprob-thr25-2mm')
cortmap = cort.maps
cort_labels = cort.labels
print("Cortical labels:", len(cort_labels))
for i, lab in enumerate(cort_labels):
    print(i, lab)

# subcortical atlas
sub = datasets.fetch_atlas_harvard_oxford('sub-maxprob-thr25-2mm')
submap = sub.maps
sub_labels = sub.labels
print("Subcortical labels:", len(sub_labels))
for i, lab in enumerate(sub_labels):
    print(i, lab)

[fetch_atlas_harvard_oxford] Dataset found in C:\Users\ell\nilearn_data\fsl
Cortical labels: 49
0 Background
1 Frontal Pole
2 Insular Cortex
3 Superior Frontal Gyrus
4 Middle Frontal Gyrus
5 Inferior Frontal Gyrus, pars triangularis
6 Inferior Frontal Gyrus, pars opercularis
7 Precentral Gyrus
8 Temporal Pole
9 Superior Temporal Gyrus, anterior division
10 Superior Temporal Gyrus, posterior division
11 Middle Temporal Gyrus, anterior division
12 Middle Temporal Gyrus, posterior division
13 Middle Temporal Gyrus, temporooccipital part
14 Inferior Temporal Gyrus, anterior division
15 Inferior Temporal Gyrus, posterior division
16 Inferior Temporal Gyrus, temporooccipital part
17 Postcentral Gyrus
18 Superior Parietal Lobule
19 Supramarginal Gyrus, anterior division
20 Supramarginal Gyrus, posterior division
21 Angular Gyrus
22 Lateral Occipital Cortex, superior division
23 Lateral Occipital Cortex, inferior division
24 Intracalcarine Cortex
25 Frontal Medial Cortex
26 Juxtapositional Lob

In [None]:
# mask resample to current beta image space
beta0=beta[:,:,:,0]
beta0img = nib.Nifti1Image(beta0, affine)
submap_resample = resample_to_img(submap, beta0img, interpolation='nearest')
cortmap_resample = resample_to_img(cortmap, beta0img, interpolation='nearest')

  submap_resample = resample_to_img(submap, beta0img, interpolation='nearest')
  submap_resample = resample_to_img(submap, beta0img, interpolation='nearest')
  cortmap_resample = resample_to_img(cortmap, beta0img, interpolation='nearest')
  cortmap_resample = resample_to_img(cortmap, beta0img, interpolation='nearest')


In [None]:
# check affine
print(submap_resample.affine)
print(cortmap_resample.affine)

[[  -2.75    0.      0.     90.75]
 [   0.      2.75    0.   -126.5 ]
 [   0.      0.      4.    -72.  ]
 [   0.      0.      0.      1.  ]]
[[  -2.75    0.      0.     90.75]
 [   0.      2.75    0.   -126.5 ]
 [   0.      0.      4.    -72.  ]
 [   0.      0.      0.      1.  ]]


In [17]:
nib.save(submap_resample, 'atlas/subcortical_mask_resample.nii.gz')
nib.save(cortmap_resample, 'atlas/cortical_mask_resample.nii.gz')

In [7]:
submap_data = submap_resample.get_fdata()
cortmap_data = cortmap_resample.get_fdata()
print("Subcortical mask shape:", submap_data.shape)
print("Cortical mask shape:", cortmap_data.shape)

Subcortical mask shape: (67, 80, 46)
Cortical mask shape: (67, 80, 46)


In [None]:
# calc voxel number in each ROI
summary_vxls = []
for i, name in enumerate(sub_labels):
    if i == 0:
        continue
    region_voxels = np.sum(submap_data == i)
    summary_vxls.append({'id':i, 'Name': name, 'region':"subcortical", 'voxels': region_voxels})
for i, name in enumerate(cort_labels):
    if i == 0:
        continue
    region_voxels = np.sum(cortmap_data == i)
    summary_vxls.append({'id':i, 'Name': name, 'region':"cortical",'voxels': region_voxels})
sum_vxl_df = pd.DataFrame(summary_vxls)
sum_vxl_df.to_csv('atlas/region_voxel_summary.csv', index=False)
sum_vxl_df

Unnamed: 0,id,Name,region,voxels
0,1,Left Cerebral White Matter,subcortical,8317
1,2,Left Cerebral Cortex,subcortical,17319
2,3,Left Lateral Ventricle,subcortical,303
3,4,Left Thalamus,subcortical,348
4,5,Left Caudate,subcortical,128
...,...,...,...,...
64,44,Planum Polare,cortical,198
65,45,Heschl's Gyrus (includes H1 and H2),cortical,154
66,46,Planum Temporale,cortical,248
67,47,Supracalcarine Cortex,cortical,76


### 3. extract fMRI in each ROI and save

In [None]:
def extract_roi_data(id, name, atlas_data, beta_data):
    ''' 
    ROI-level data filtration use this code 
    '''
    roi_coords = np.where(atlas_data == id)
    if len(roi_coords[0]) == 0:
        print(f"Warning: ROI {name} has no voxels!")

    # get roi voxels: [subject, voxel]
    roi_voxels = beta_data[roi_coords[0], roi_coords[1], roi_coords[2], :].T
    
    # vertical cleaning: check NaN ratio per voxel
    nan_ratio_per_voxel = np.isnan(roi_voxels).mean(axis=0)
    # save <50%
    valid_voxel_mask = nan_ratio_per_voxel <= 0.5
    left_ratio = np.sum(valid_voxel_mask)/roi_voxels.shape[1]
    cleaned_voxels = roi_voxels[:, valid_voxel_mask]
    if cleaned_voxels.shape[1] == 0:
        print(f"WARNING: {name}: all voxels removed due to high NaN ratio!")
        
    # horizontal cleaning: check NaN ratio per subject
    # if > 30% set all to NaN
    nan_ratio_per_sub = np.isnan(cleaned_voxels).mean(axis=1)
    invalid_sub_mask = nan_ratio_per_sub > 0.7
    cleaned_voxels[invalid_sub_mask, :] = np.nan
    
    nan_indices = np.where(np.isnan(cleaned_voxels))
    if len(nan_indices[0]) > 0:
        # NaN<30%, fill with trial mean
        subject_means = np.nanmean(cleaned_voxels, axis=1)
        cleaned_voxels[nan_indices] = np.take(subject_means, nan_indices[0])
    
    all_nan_subjects = np.where(np.isnan(cleaned_voxels).all(axis=1))[0]
    
    return cleaned_voxels, left_ratio, all_nan_subjects

In [None]:
blacklist_keywords = [
    "White Matter",
    "Ventricle",
    "Cerebral Cortex"
]
for i, name in enumerate(sub.labels):
    for keyword in blacklist_keywords:
        if keyword in name:
            print(f"Skipping ROI {i} - {name} due to blacklist keyword: {keyword}")

Skipping ROI 1 - Left Cerebral White Matter due to blacklist keyword: White Matter
Skipping ROI 2 - Left Cerebral Cortex due to blacklist keyword: Cerebral Cortex
Skipping ROI 3 - Left Lateral Ventricle due to blacklist keyword: Ventricle
Skipping ROI 12 - Right Cerebral White Matter due to blacklist keyword: White Matter
Skipping ROI 13 - Right Cerebral Cortex due to blacklist keyword: Cerebral Cortex
Skipping ROI 14 - Right Lateral Ventricle due to blacklist keyword: Ventricle


In [18]:
import os
save_dir_subcort = "data/sub_cort"
save_dir_cort   = "data/cort"
os.makedirs(save_dir_subcort, exist_ok=True)
os.makedirs(save_dir_cort, exist_ok=True)

#### subcortical roi voxel save

In [30]:
sub_sum = []
for i, name in enumerate(sub.labels):
    if i in [0,1,2,3,12,13,14]:
        continue
    roi_features, ratio, allnan_idx = extract_roi_data(i, name, submap_data, beta)
    np.save(os.path.join(save_dir_subcort, f"beta_{i}_{name}.npy"), roi_features)
    print(f"{name} saved in shape: {roi_features.shape}")
    sub_sum.append({'id':i, 'Name': name, 'region':"subcortical", 'ratio':ratio, 'voxels': roi_features.shape[1], 'allnan_idx': allnan_idx})


Left Thalamus saved in shape: (7200, 348)
Left Caudate saved in shape: (7200, 128)
Left Putamen saved in shape: (7200, 223)
Left Pallidum saved in shape: (7200, 66)
Brain-Stem saved in shape: (7200, 1168)
Left Hippocampus saved in shape: (7200, 183)
Left Amygdala saved in shape: (7200, 83)
Left Accumbens saved in shape: (7200, 27)
Right Thalamus saved in shape: (7200, 340)
Right Caudate saved in shape: (7200, 137)
Right Putamen saved in shape: (7200, 197)
Right Pallidum saved in shape: (7200, 67)
Right Hippocampus saved in shape: (7200, 186)
Right Amygdala saved in shape: (7200, 104)
Right Accumbens saved in shape: (7200, 22)


  subject_means = np.nanmean(cleaned_voxels, axis=1)


In [38]:
subsum_df = pd.DataFrame(sub_sum)
subsum_df["allnan_sum"] = subsum_df["allnan_idx"].apply(lambda x: len(x))
subsum_df.to_csv('data/subcortical_region_feature_summary.csv', index=False)
subsum_df

Unnamed: 0,id,Name,region,ratio,voxels,allnan_idx,allnan_sum
0,4,Left Thalamus,subcortical,1.0,348,[],0
1,5,Left Caudate,subcortical,1.0,128,[],0
2,6,Left Putamen,subcortical,1.0,223,[],0
3,7,Left Pallidum,subcortical,1.0,66,[],0
4,8,Brain-Stem,subcortical,0.916078,1168,[],0
5,9,Left Hippocampus,subcortical,1.0,183,[],0
6,10,Left Amygdala,subcortical,1.0,83,[],0
7,11,Left Accumbens,subcortical,1.0,27,"[864, 865, 866, 867, 868, 869, 870, 871, 872, ...",216
8,15,Right Thalamus,subcortical,1.0,340,[],0
9,16,Right Caudate,subcortical,1.0,137,[],0


#### cortical roi voxel save

In [32]:
cort_sum = []
for i, name in enumerate(cort.labels):
    if i == 0:
        continue
    roi_features, ratio, allnan_idx = extract_roi_data(i, name, cortmap_data, beta)
    np.save(os.path.join(save_dir_cort, f"beta_{i}_{name}.npy"), roi_features)
    print(f"{name} saved in shape: {roi_features.shape}")
    cort_sum.append({'id':i, 'Name': name, 'region':"cortical", 'ratio':ratio, 'voxels': roi_features.shape[1], 'allnan_idx': allnan_idx})


Frontal Pole saved in shape: (7200, 3382)
Insular Cortex saved in shape: (7200, 624)
Superior Frontal Gyrus saved in shape: (7200, 1333)
Middle Frontal Gyrus saved in shape: (7200, 1332)
Inferior Frontal Gyrus, pars triangularis saved in shape: (7200, 286)
Inferior Frontal Gyrus, pars opercularis saved in shape: (7200, 367)
Precentral Gyrus saved in shape: (7200, 2215)
Temporal Pole saved in shape: (7200, 783)
Superior Temporal Gyrus, anterior division saved in shape: (7200, 142)
Superior Temporal Gyrus, posterior division saved in shape: (7200, 433)
Middle Temporal Gyrus, anterior division saved in shape: (7200, 211)
Middle Temporal Gyrus, posterior division saved in shape: (7200, 482)
Middle Temporal Gyrus, temporooccipital part saved in shape: (7200, 488)
Inferior Temporal Gyrus, anterior division saved in shape: (7200, 108)
Inferior Temporal Gyrus, posterior division saved in shape: (7200, 68)
Inferior Temporal Gyrus, temporooccipital part saved in shape: (7200, 250)
Postcentral Gy

  subject_means = np.nanmean(cleaned_voxels, axis=1)


Superior Parietal Lobule saved in shape: (7200, 732)
Supramarginal Gyrus, anterior division saved in shape: (7200, 445)
Supramarginal Gyrus, posterior division saved in shape: (7200, 598)
Angular Gyrus saved in shape: (7200, 623)
Lateral Occipital Cortex, superior division saved in shape: (7200, 2258)
Lateral Occipital Cortex, inferior division saved in shape: (7200, 990)
Intracalcarine Cortex saved in shape: (7200, 376)
Frontal Medial Cortex saved in shape: (7200, 131)
Juxtapositional Lobule Cortex (formerly Supplementary Motor Cortex) saved in shape: (7200, 396)
Subcallosal Cortex saved in shape: (7200, 80)
Paracingulate Gyrus saved in shape: (7200, 751)
Cingulate Gyrus, anterior division saved in shape: (7200, 677)
Cingulate Gyrus, posterior division saved in shape: (7200, 621)
Precuneous Cortex saved in shape: (7200, 1462)
Cuneal Cortex saved in shape: (7200, 323)
Frontal Orbital Cortex saved in shape: (7200, 585)
Parahippocampal Gyrus, anterior division saved in shape: (7200, 209)

In [40]:
cortsum_df = pd.DataFrame(cort_sum)
cortsum_df["allnan_sum"] = cortsum_df["allnan_idx"].apply(lambda x: len(x))
cortsum_df.to_csv('data/cortical_region_feature_summary.csv', index=False)
cortsum_df

Unnamed: 0,id,Name,region,ratio,voxels,allnan_idx,allnan_sum
0,1,Frontal Pole,cortical,0.811226,3382,[],0
1,2,Insular Cortex,cortical,1.0,624,[],0
2,3,Superior Frontal Gyrus,cortical,0.992554,1333,[],0
3,4,Middle Frontal Gyrus,cortical,0.950071,1332,[],0
4,5,"Inferior Frontal Gyrus, pars triangularis",cortical,1.0,286,[],0
5,6,"Inferior Frontal Gyrus, pars opercularis",cortical,1.0,367,[],0
6,7,Precentral Gyrus,cortical,0.975341,2215,[],0
7,8,Temporal Pole,cortical,0.633495,783,[],0
8,9,"Superior Temporal Gyrus, anterior division",cortical,0.993007,142,[],0
9,10,"Superior Temporal Gyrus, posterior division",cortical,0.902083,433,[],0


In [None]:
# usage example
behav_data = pd.read_csv(f"../data/processed/valence_order.csv")
label = behav_data['valence'].to_numpy()
beta = n1[:,:,:,behav_data['order'].to_numpy()]

In [9]:
print(beta.shape)
print(label.shape)

(67, 80, 46, 7112)
(7112,)
