This notebook is used to:
1. get searchlight-level features for glmm model

In [1]:
import numpy as np
import pandas as pd
import nibabel as nib
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.stats import zscore

In [2]:
# read-in data
final_recall = np.load('data/free_recall_mat_filtered.npy')         # [n_subs, n_pics]
final_data = np.load('data/full_encoding_data_flat_filtered.npy')   # [n_subs, n_pics, n_voxels]
n_subs, n_pics, n_voxels = final_data.shape
X_norm = np.zeros_like(final_data)
# normalize
for s in range(n_subs):
    sub_dat = final_data[s, :, :]
    # mean and std across pic
    mean_vec = np.mean(sub_dat, axis=0)
    std_vec = np.std(sub_dat, axis=0)
    std_vec[std_vec == 0] = 1.0
    X_norm[s, :, :] = (sub_dat - mean_vec) / std_vec
print(X_norm.shape) # (82, 69, 42699)

(82, 69, 42699)


In [5]:
# get searchlight index
mask_img = nib.load("atlas/mask_all_valid_voxels.nii.gz")
mask_data = mask_img.get_fdata()
mask_bool = mask_data.astype(bool) 

In [None]:
# readin cluster label maps
effort_cluster_label_img = nib.load("Final_Cluster_label_C1.nii.gz")
effort_cluster_label_data = effort_cluster_label_img.get_fdata()
effort_cluster_label = np.unique(effort_cluster_label_data)
fidelity_cluster_label = nib.load("Searchlight_ISC_cluster_label_allthr.nii.gz")
fidelity_cluster_label_data = fidelity_cluster_label.get_fdata()
fidelity_cluster_label = np.unique(fidelity_cluster_label_data)

In [None]:
# convert between 3d coordinates and flat indices
def get_roi_indices_from_flat_data(roi_bool, mask_bool):
    """
    turn ROI mask (3D) into the column indices of final_data (Flattened)
    """
    
    roi_bool = roi_bool.astype(bool)
    
    # original all -1
    # if mask==TRUEï¼Œput in 1/2/3 (same as final_data column index)
    flat_mapping = np.full(mask_bool.shape, -1, dtype=int)
    n_valid_voxels = np.sum(mask_bool)
    flat_mapping[mask_bool] = np.arange(n_valid_voxels)
    
    # get intersection
    overlap_bool = mask_bool & roi_bool
    
    # get ROI indices in flat data
    roi_indices = flat_mapping[overlap_bool]
    roi_indices = roi_indices[roi_indices != -1]
    
    return roi_indices

# test
# test_roi_idx = get_roi_indices_from_flat_data(effort_cluster_label_data == 1, mask_bool)

In [None]:
def calculate_fidelity_loo(roi_data, recall_mat, min_recallers=3):
    """
    calc CLUSTER-LEVEL fidelity with LOO method
    """
    n_subs, n_pics, n_vox = roi_data.shape
    fidelity_mat = np.full((n_subs, n_pics), np.nan)
    
    for p in range(n_pics):
        
        idx_recallers = np.where(recall_mat[:, p] == 1)[0]
        n_R = len(idx_recallers)
        
        if n_R < min_recallers: continue
            
        current_pic_data = roi_data[:, p, :]
        sum_recallers = np.sum(current_pic_data[idx_recallers, :], axis=0)
        
        for s in range(n_subs):
            pat_sub = current_pic_data[s, :]
            if np.std(pat_sub) == 0: continue
                
            if recall_mat[s, p] == 1:
                pat_template = (sum_recallers - pat_sub) / (n_R - 1)
            else:
                pat_template = sum_recallers / n_R
            
            if np.std(pat_template) == 0: continue
            
            r = np.corrcoef(pat_sub, pat_template)[0, 1]
            fidelity_mat[s, p] = np.arctanh(np.clip(r, -0.999, 0.999))
            
    return fidelity_mat


In [None]:
# for intensity, just use the cluster activation mean
effort_flat_features = []
for cluster_id in effort_cluster_label:
    if cluster_id == 0:
        effort_flat_features.append([0])
        continue
    temp_mask = get_roi_indices_from_flat_data(effort_cluster_label_data==cluster_id, mask_bool)
    effort_flat_features.append(np.mean(X_norm[:, :, temp_mask], axis=2))
    # when use, just do: effort_flat_features[cluster_id]

# for fidelity, use LOO method to calc the pattern fidelity
fidelity_flat_features = []
for cluster_id in fidelity_cluster_label:
    if cluster_id == 0:
        fidelity_flat_features.append([0])
        continue
    temp_mask = get_roi_indices_from_flat_data(fidelity_cluster_label_data==cluster_id, mask_bool)
    fidelity_flat_features.append(calculate_fidelity_loo(X_norm[:, :, temp_mask], final_recall))
    # when use, just do: fidelity_flat_features[cluster_id]

In [27]:
len(effort_flat_features), len(fidelity_flat_features)

(6, 5)

In [14]:
effort_cluster_label, fidelity_cluster_label

(array([0., 1., 2., 3., 4., 5.]), array([0., 1., 2., 3., 4.]))

In [None]:
# save as dataframe
rows = []
n_subs = X_norm.shape[0]
len(effort_flat_features), len(fidelity_flat_features)
for s in range(n_subs):
    for p in range(n_pics):
        
        val_recall = final_recall[s, p]
        rows.append({
            'Subject': str(s),
            'Item': str(p),
            'Recall': int(val_recall),
            'Intensity1': effort_flat_features[1][s, p],
            'Intensity2': effort_flat_features[2][s, p],
            'Intensity3': effort_flat_features[3][s, p],
            'Intensity4': effort_flat_features[4][s, p],
            'Intensity5': effort_flat_features[5][s, p],
            'Fidelity1': fidelity_flat_features[1][s, p],
            'Fidelity2': fidelity_flat_features[2][s, p],
            'Fidelity3': fidelity_flat_features[3][s, p],
            'Fidelity4': fidelity_flat_features[4][s, p]
        })

df = pd.DataFrame(rows)

In [33]:
df.to_csv('glmm_features.csv')

In [None]:
# normalize features
preds = ['Intensity1', 'Fidelity1', 'Intensity2', 'Fidelity2', 
         'Intensity3', 'Fidelity3', 'Intensity4', 'Fidelity4',
         'Intensity5']
for col in preds:
    df[f'{col}_Z'] = zscore(df[col])

In [37]:
df.to_csv('glmm_features_z.csv')