In [None]:
#claude trying to return to my functioning history with the error of only two runs and focusing on one subject hard coded
#searchlight MVPD

import warnings
warnings.filterwarnings("ignore")
import resource
import sys
import time
import os
import gc
import pandas as pd
import numpy as np
import pdb

from sklearn.decomposition import PCA
from sklearn.model_selection import ShuffleSplit
from sklearn.linear_model import LinearRegression

from nilearn import image, datasets
import nibabel as nib
from brainiak.searchlight.searchlight import Searchlight, Ball

def transform_mask_to_native(subject_func, standard_mask, output_dir):
    """
    Transform the standard space whole brain mask to the subject's native space.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    func_img = image.load_img(subject_func)
    mask_img = image.load_img(standard_mask)
    
    native_mask = image.resample_to_img(mask_img, func_img, interpolation='nearest')
    
    output_path = os.path.join(output_dir, f'whole_brain_mask_native.nii.gz')
    native_mask.to_filename(output_path)
    
    return output_path

# Import your parameters
curr_dir = f'/user_data/csimmon2/git_repos/ptoc'
sys.path.insert(0, curr_dir)
import ptoc_params as params

print('libraries loaded...')

# Load subject information
sub_info = pd.read_csv(f'{curr_dir}/sub_info.csv')
sub_info = sub_info[sub_info['group'] == 'control']
subs = ['sub-025']  # Uncomment for testing
dorsal = ['pIPS'] # Run for one ROI initially

print(subs, dorsal)

# Set up directories and parameters
study = 'ptoc'
study_dir = f"/lab_data/behrmannlab/vlad/{study}"
localizer = 'Object'  # scramble or object. This is the localizer task.
results_dir = '/user_data/csimmon2/git_repos/ptoc/results'
raw_dir = "/lab_data/behrmannlab/vlad/hemispace"
exp = 'loc' 

out_dir = f'{study_dir}/derivatives/fc'
sub_dir = f'{study_dir}/sub-025/ses-01/'
cov_dir = f'{raw_dir}/covs'
roi_dir = f'{sub_dir}/derivatives/rois'
exp_dir = f'{sub_dir}/derivatives/fsl/{exp}'

runs = list(range(1,3))

standard_mask_path = '/user_data/csimmon2/git_repos/ptoc/roiParcels/mruczek_parcels/binary/all_visual_areas.nii.gz'
native_mask_path = transform_mask_to_native(
    f'{exp_dir}/run-01/1stLevel.feat/filtered_func_data_reg.nii.gz',
    standard_mask_path,
    f'{sub_dir}/derivatives/masks'
)
whole_brain_mask = image.load_img(native_mask_path)

affine = whole_brain_mask.affine
dimsize = whole_brain_mask.header.get_zooms()  #get dimensions

# scan parameters
vols = 184
first_fix = 0

# threshold for PCA
pc_thresh = .9

clf = LinearRegression()
rs = ShuffleSplit(n_splits=5, test_size=1/3, random_state=0)

print('Searchlight setup ...')
mask = image.get_data(whole_brain_mask) #the mask to search within

sl_rad = 2 #radius of searchlight sphere (in voxels)
max_blk_edge = 10 #how many blocks to send on each parallelized search
pool_size = 1 #number of cores to work on each search

voxels_proportion=1
shape = Ball

def extract_pc(data, n_components=None):
    pca = PCA(n_components = n_components)
    pca.fit(data)
    return pca

def calc_pc_n(pca, thresh):
    explained_variance = pca.explained_variance_ratio_
    var = 0
    for n_comp, ev in enumerate(explained_variance):
        var += ev
        if var >= thresh:
            break
    return n_comp+1

def calc_mvc(seed_train,seed_test, target_train, target_test, target_pc):
    all_corrs = []
    for pcn in range(0,len(target_pc.explained_variance_ratio_)):
        clf.fit(seed_train, target_train[:,pcn])
        pred_ts = clf.predict(seed_test)
        weighted_corr = np.corrcoef(pred_ts,target_test[:,pcn])[0,1] * target_pc.explained_variance_ratio_[pcn]
        all_corrs.append(weighted_corr)
    final_corr = np.sum(all_corrs)/(np.sum(target_pc.explained_variance_ratio_))
    return final_corr

def create_ts_mask(train, test):
    train_index = []
    test_index = []
    for tr in train:
        train_index = train_index + list(range((tr-1) * (vols-first_fix),((tr-1) * (vols-first_fix)) + (vols-first_fix)))
    for te in test:
        test_index = test_index + list(range((te-1) * (vols-first_fix),((te-1) * (vols-first_fix)) + (vols-first_fix)))
    return train_index, test_index

def mvpd(data, sl_mask, myrad, seed_ts):
    data4D = data[0]
    data4D = np.transpose(data4D.reshape(-1, data[0].shape[3]))
    mvc_list = []
    for train_runs, test_runs in rs.split(runs): 
        train_index, test_index = create_ts_mask(train_runs, test_runs)
        seed_train = seed_ts[train_index,:]
        seed_test = seed_ts[test_index,:]
        target_train = data4D[train_index, :]
        target_test = data4D[test_index, :]
        n_comp = calc_pc_n(extract_pc(seed_train),pc_thresh)
        seed_pca = extract_pc(seed_train, n_comp)
        seed_train_pcs = seed_pca.transform(seed_train)
        seed_test_pcs = seed_pca.transform(seed_test)
        n_comp = calc_pc_n(extract_pc(target_train),pc_thresh)
        target_pca = extract_pc(target_train, n_comp)
        target_train_pcs = target_pca.transform(target_train)
        target_test_pcs = target_pca.transform(target_test)
        mvc_list.append(calc_mvc(seed_train_pcs, seed_test_pcs, target_train_pcs, target_test_pcs, target_pca))
    return np.mean(mvc_list)   

def load_data():
    print('Loading data...')
    all_runs = []
    for run in runs:
        print(f"Loading run {run}")
        try:
            curr_run = image.load_img(f"{raw_dir}/sub-025/ses-01/derivatives/fsl/loc/run-0{run}/1stLevel.feat/filtered_func_data_reg.nii.gz")
            curr_run = image.get_data(image.clean_img(curr_run, standardize=True, mask_img=whole_brain_mask))
            print(f"Run {run} shape: {curr_run.shape}")
            all_runs.append(curr_run)
        except Exception as e:
            print(f"Error loading run {run}: {str(e)}")
        print(f"Memory usage after run {run}: {(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)/1024} MB")
    print('Data loaded. Concatenating...')
    if not all_runs:
        raise ValueError("No valid run data was loaded. Check your input files and paths.")
    bold_vol = np.concatenate(all_runs, axis=3)  # Compile into 4D
    del all_runs
    print(f"Concatenated data shape: {bold_vol.shape}")
    print(f"Final memory usage: {(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)/1024} MB")
    print('Data concatenated...')
    gc.collect()
    return bold_vol

def extract_seed_ts(bold_vol, sub, roi, hemisphere, task='loc', radius=6):
    print("Extracting seed time series...")
    seed_roi_path = f'{study_dir}/{sub}/ses-01/derivatives/rois/spheres_nifti/{sub}_{roi}_{hemisphere}_{task}_sphere_r{radius}mm.nii.gz'
    print(f"Loading seed ROI from: {seed_roi_path}")
    try:
        seed_roi_img = image.load_img(seed_roi_path)
        seed_roi = image.get_data(seed_roi_img)
        print(f"Loaded seed ROI shape: {seed_roi.shape}")
        print(f"Bold volume shape: {bold_vol.shape}")
        if seed_roi.shape[:3] != bold_vol.shape[:3]:
            print("Warning: Seed ROI shape does not match bold volume shape. Attempting to reshape...")
            seed_roi = image.resample_to_img(seed_roi_img, nib.Nifti1Image(bold_vol, affine), interpolation='nearest').get_fdata()
            print(f"Reshaped seed ROI to: {seed_roi.shape}")
        if len(seed_roi.shape) == 3:
            seed_roi = seed_roi[..., np.newaxis]
        masked_img = seed_roi * bold_vol
        seed_ts = masked_img.reshape(-1, bold_vol.shape[3])
        seed_ts = seed_ts[~np.all(seed_ts == 0, axis=1)]
        seed_ts = np.transpose(seed_ts)
        print(f"Extracted seed time series shape: {seed_ts.shape}")
        print('Seed data extracted successfully.')
        return seed_ts
    except Exception as e:
        print(f"Error in extract_seed_ts: {str(e)}")
        raise

bold_vol = load_data()
print(f"bold_vol type: {type(bold_vol)}")
print(f"bold_vol shape: {bold_vol.shape}")
print(f"bold_vol dtype: {bold_vol.dtype}")
seed_ts = extract_seed_ts(bold_vol, sub='sub-025', roi='pIPS', hemisphere='left')

t1 = time.time()
print("Begin Searchlight", print((resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)/1024))
sl = Searchlight(sl_rad=sl_rad,max_blk_edge=max_blk_edge, shape = shape)
print('Distribute', (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)/1024)
sl.distribute([bold_vol], mask)

print('Broadcast', (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)/1024)
sl.broadcast(seed_ts)
print('Run', (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)/1024, flush= True)
sl_result = sl.run_searchlight(mvpd, pool_size=pool_size)
print("End Searchlight\n", (time.time()-t1)/60)

sl_result = sl_result.astype('double')
sl_result[np.isnan(sl_result)] = 0
sl_nii = nib.Nifti1Image(sl_result, affine)
nib.save(sl_nii, f'{out_dir}/{study}_sub-025_pIPS_left_mvpd.nii.gz')

libraries loaded...
['sub-025'] ['pIPS']
Searchlight setup ...
Loading data...
Loading run 1
Run 1 shape: (176, 256, 256, 184)
Memory usage after run 1: 20.0933837890625 MB
Loading run 2
Run 2 shape: (176, 256, 256, 184)
Memory usage after run 2: 21.25104522705078 MB
Data loaded. Concatenating...
Concatenated data shape: (176, 256, 256, 368)
Final memory usage: 21.25104522705078 MB
Data concatenated...
bold_vol type: <class 'numpy.ndarray'>
bold_vol shape: (176, 256, 256, 368)
bold_vol dtype: float32
Extracting seed time series...
Loading seed ROI from: /lab_data/behrmannlab/vlad/ptoc/sub-025/ses-01/derivatives/rois/spheres_nifti/sub-025_pIPS_left_loc_sphere_r6mm.nii.gz
Loaded seed ROI shape: (176, 256, 256)
Bold volume shape: (176, 256, 256, 368)
Extracted seed time series shape: (368, 114)
Seed data extracted successfully.
51.74021530151367
Begin Searchlight None
Distribute 51.74021530151367
Broadcast 51.74021530151367
Run 51.74021530151367


In [None]:
graph TD
    A[Attention System] --> B[Top-Down Processes]
    A --> C[Bottom-Up Processes]
    B --> D[Executive Function]
    B --> E[Goals]
    B --> F[Conscious Allocation]
    C --> G[Stimulus Salience]
    C --> H[Environmental Factors]
    I[Task Difficulty] --> J[Resource Allocation]
    K[Individual Differences] --> J
    L[Context] --> J
    J --> M[Attentional Focus]
    D --> M
    E --> M
    F --> M
    G --> M
    H --> M

In [3]:
##keep for now I feel like it is close to working
import os
import pandas as pd
import numpy as np
from nilearn import image, input_data
from nilearn.glm.first_level import compute_regressor
from statsmodels.tsa.stattools import grangercausalitytests
import sys
import nibabel as nib
import logging
from nilearn.image import new_img_like
from nilearn.masking import apply_mask, unmask
from sklearn.model_selection import KFold


# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Import your parameters
curr_dir = f'/user_data/csimmon2/git_repos/ptoc'
sys.path.insert(0, curr_dir)
import ptoc_params as params

# Set up directories and parameters
study = 'ptoc'
study_dir = f"/lab_data/behrmannlab/vlad/{study}"
localizer = 'Object'  # scramble or object. This is the localizer task.
results_dir = '/user_data/csimmon2/git_repos/ptoc/results'
raw_dir = params.raw_dir

# Load subject information
sub_info = pd.read_csv(f'{curr_dir}/sub_info.csv')
sub_info = sub_info[sub_info['group'] == 'control']
subs = sub_info['sub'].tolist()
# subs = ['sub-068']  # Uncomment for testing

rois = ['pIPS', 'LO']
hemispheres = ['left', 'right']
run_num = 3
runs = list(range(1, run_num + 1))
run_combos = [[rn1, rn2] for rn1 in range(1, run_num + 1) for rn2 in range(rn1 + 1, run_num + 1)]

def create_searchlight_sphere(center, radius, mask):
    sphere = np.zeros(mask.shape)
    x, y, z = np.ogrid[:mask.shape[0], :mask.shape[1], :mask.shape[2]]
    mask_x, mask_y, mask_z = center
    dist_from_center = np.sqrt((x - mask_x)**2 + (y - mask_y)**2 + (z - mask_z)**2)
    sphere[dist_from_center <= radius] = 1
    return sphere

def extract_searchlight_timeseries(img, sphere_mask):
    masked_data = apply_mask(img, sphere_mask)
    return np.mean(masked_data, axis=1).reshape(-1, 1)

def extract_roi_sphere(img, coords):
    roi_masker = input_data.NiftiSpheresMasker([tuple(coords)], radius=6)
    seed_time_series = roi_masker.fit_transform(img)
    phys = np.mean(seed_time_series, axis=1).reshape(-1, 1)
    return phys  # Return non-standardized time series

def make_psy_cov(runs, ss):
    temp_dir = f'{raw_dir}/{ss}/ses-01'
    cov_dir = f'{temp_dir}/covs'
    vols_per_run, tr = 184, 2.0
    total_vols = vols_per_run * len(runs)
    times = np.arange(0, total_vols * tr, tr)
    full_cov = pd.DataFrame(columns=['onset', 'duration', 'value'])

    for i, rn in enumerate(runs):
        ss_num = ss.split('-')[1]
        obj_cov_file = f'{cov_dir}/catloc_{ss_num}_run-0{rn}_{localizer}.txt'

        if not os.path.exists(obj_cov_file):
            logging.warning(f'Covariate file not found for run {rn}')
            continue

        obj_cov = pd.read_csv(obj_cov_file, sep='\t', header=None, names=['onset', 'duration', 'value'])
        
        if i > 0:
            obj_cov['onset'] += i * vols_per_run * tr
        
        full_cov = pd.concat([full_cov, obj_cov])

    full_cov = full_cov.sort_values(by=['onset']).reset_index(drop=True)
    cov = full_cov.to_numpy()
    valid_onsets = cov[:, 0] < times[-1]
    cov = cov[valid_onsets]

    if cov.shape[0] == 0:
        logging.warning('No valid covariate data after filtering. Returning zeros array.')
        return np.zeros((total_vols, 1))

    psy, _ = compute_regressor(cov.T, 'spm', times)
    psy[psy > 0] = 1
    psy[psy <= 0] = 0
    return psy

def extract_cond_ts(ts, cov):
    block_ind = (cov==1)
    block_ind = np.insert(block_ind, 0, True)
    block_ind = np.delete(block_ind, len(block_ind)-1)
    block_ind = (cov == 1).reshape((len(cov))) | block_ind
    return ts[block_ind]

def conduct_gca_searchlight():
    logging.info(f'Running GCA with searchlight for {localizer}...')
    tasks = ['loc']
    
    # Load whole-brain mask
    whole_brain_mask = nib.load(f'{curr_dir}/roiParcels/mruczek_parcels/binary/all_visual_areas.nii.gz')
    mask_data = whole_brain_mask.get_fdata().astype(bool)
    
    for ss in subs:
        sub_summary = pd.DataFrame(columns=['sub', 'fold', 'task', 'center_x', 'center_y', 'center_z', 'f_diff'])
        
        sub_dir = f'{study_dir}/{ss}/ses-01/'
        temp_dir = f'{raw_dir}/{ss}/ses-01'
        exp_dir = f'{temp_dir}/derivatives/fsl/loc'
        output_dir = f'{sub_dir}/derivatives/gca_searchlight'
        os.makedirs(output_dir, exist_ok=True)

        roi_coords = pd.read_csv(f'{sub_dir}/derivatives/rois/spheres/sphere_coords_hemisphere.csv')

        for rcn, rc in enumerate(run_combos):
            logging.info(f"Processing run combination {rc} for subject {ss}")
            
            filtered_list = []
            for rn in rc:
                curr_run = nib.load(f'{exp_dir}/run-0{rn}/1stLevel.feat/filtered_func_data_reg.nii.gz')
                curr_run_data = curr_run.get_fdata()
                curr_run_cleaned = image.clean_img(curr_run)
                filtered_list.append(curr_run_cleaned)

            img4d = image.concat_imgs(filtered_list)
            affine = curr_run.affine
            logging.info(f"Concatenated image shape: {img4d.shape}")

            psy = make_psy_cov(rc, ss)
            
            f_diff_3d = np.zeros(mask_data.shape)
            
            # Iterate over all voxels in the brain mask
            for x, y, z in zip(*np.where(mask_data)):
                sphere_mask = create_searchlight_sphere((x, y, z), radius=6, mask=mask_data)
                sphere_ts = extract_searchlight_timeseries(img4d, sphere_mask)
                
                if sphere_ts.shape[0] != psy.shape[0]:
                    logging.warning(f"Mismatch in volumes: sphere_ts has {sphere_ts.shape[0]}, psy has {psy.shape[0]}")
                    continue
                
                sphere_phys = extract_cond_ts(sphere_ts, psy)
                
                # Perform GCA between the sphere and a reference region (e.g., pIPS)
                pips_coords = roi_coords[(roi_coords['index'] == rcn) & 
                                         (roi_coords['task'] == 'loc') & 
                                         (roi_coords['roi'] == 'pIPS') &
                                         (roi_coords['hemisphere'] == 'right')]
                
                if pips_coords.empty:
                    logging.warning(f"No coordinates found for pIPS, run combo {rc}")
                    continue
                
                pips_ts = extract_roi_sphere(img4d, pips_coords[['x', 'y', 'z']].values.tolist()[0])
                pips_phys = extract_cond_ts(pips_ts, psy)
                
                neural_ts = pd.DataFrame({
                    'sphere': sphere_phys.ravel(),
                    'pips': pips_phys.ravel()
                })
                
                gc_res_sphere = grangercausalitytests(neural_ts[['pips', 'sphere']], 1, verbose=False)
                gc_res_pips = grangercausalitytests(neural_ts[['sphere', 'pips']], 1, verbose=False)
                
                f_diff = gc_res_sphere[1][0]['ssr_ftest'][0] - gc_res_pips[1][0]['ssr_ftest'][0]
                
                f_diff_3d[x, y, z] = f_diff
                
                curr_data = pd.Series([ss, rcn, 'loc', x, y, z, f_diff], index=sub_summary.columns)
                sub_summary = sub_summary.append(curr_data, ignore_index=True)
        
        logging.info(f'Completed GCA searchlight for subject {ss}')
        sub_summary.to_csv(f'{output_dir}/gca_searchlight_summary_{localizer.lower()}.csv', index=False)
        
        # Save the 3D nifti image of f_diff values
        f_diff_3d = f_diff_3d.astype('float64')  # Convert to double precision
        f_diff_3d[np.isnan(f_diff_3d)] = 0  # Replace NaNs with zeros
        f_diff_img = nib.Nifti1Image(f_diff_3d, affine)
        nib.save(f_diff_img, f'{output_dir}/gca_searchlight_f_diff_{localizer.lower()}.nii.gz')

def summarize_gca_searchlight():
    logging.info('Creating summary across subjects for searchlight GCA...')
    
    all_subjects_data = []
    
    for ss in subs:
        sub_dir = f'{study_dir}/{ss}/ses-01/'
        data_dir = f'{sub_dir}/derivatives/gca_searchlight'
        
        curr_df = pd.read_csv(f'{data_dir}/gca_searchlight_summary_{localizer.lower()}.csv')
        curr_df['sub'] = ss
        all_subjects_data.append(curr_df)
    
    df_all = pd.concat(all_subjects_data, ignore_index=True)
    
    # Calculate mean and std of f_diff across subjects for each voxel
    df_summary = df_all.groupby(['center_x', 'center_y', 'center_z'])['f_diff'].agg(['mean', 'std']).reset_index()
    df_summary.columns = ['x', 'y', 'z', 'mean_f_diff', 'std_f_diff']
    
    output_dir = f"{results_dir}/gca_searchlight"
    os.makedirs(output_dir, exist_ok=True)
    summary_file = f"{output_dir}/all_subjects_gca_searchlight_summary_{localizer.lower()}.csv"
    df_summary.to_csv(summary_file, index=False)
    
    logging.info(f'Summary across subjects completed and saved to {summary_file}')
    print(df_summary.head())
    
    # Create and save a 3D nifti image of mean f_diff values
    whole_brain_mask = nib.load(f'{curr_dir}/roiParcels/mruczek_parcels/binary/all_visual_areas.nii.gz')
    mean_f_diff_3d = np.zeros(whole_brain_mask.shape)
    for _, row in df_summary.iterrows():
        mean_f_diff_3d[int(row['x']), int(row['y']), int(row['z'])] = row['mean_f_diff']
    
    mean_f_diff_img = new_img_like(whole_brain_mask, mean_f_diff_3d)
    nib.save(mean_f_diff_img, f'{output_dir}/gca_searchlight_mean_f_diff_{localizer.lower()}.nii.gz')
    
    return df_summary

if __name__ == "__main__":
    conduct_gca_searchlight()
    #summarize_gca_searchlight()