# Notebook to put all betas into individual pkl files


In [13]:
import numpy as np
import pickle as pkl
import os
from tqdm import tqdm

## Transform NSD betas

In [14]:
# Function that loads a pkl file with betas and stores each vector separately

def nsd_betas_to_indiv_files(betas_path, 
                         subs=None, 
                         rois=None):

    if subs is None:
        subs = sorted(os.listdir(betas_path))
    
    for sub in subs:
        # Load pickle with ids of stimuli
        with open(f'{betas_path}/{sub}/events_imgtag-73k_id.pkl', 'rb') as f:
            idxs = pkl.load(f)[0]
        print(f'Processing {sub}...')
        if rois is None:
            rois = sorted([r.split('_')[0] for r in os.listdir(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl'))])
        for roi in rois:
            roi_pkl = roi+'_betas-GLMsingle_type-typeb_z=1.pkl'
            print(f'Processing {roi_pkl}...')
            with open(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl', roi_pkl), 'rb') as f:
                data = pkl.load(f)
            d = data['data_allvoxel']
            savedir = os.path.join(betas_path, sub, 'indiv_npys', roi_pkl[:-4])
            
            # If savedir exists, delete all its contents
            if os.path.exists(savedir):
                for f in os.listdir(savedir):
                    os.remove(os.path.join(savedir, f))
            else:
                os.makedirs(savedir, exist_ok=True)

            for s, stim in tqdm(enumerate(d)):
                for r, rep in enumerate(stim):
                    npy_name = f'{idxs[s]-1:06d}_{r}.npy' #TODO: Check if indexes match correctly here
                    np.save(os.path.join(savedir, npy_name), rep)


In [12]:
nsd_betas_to_indiv_files('../data/betas_nsd', rois=['lPPA', 'rPPA'])

Processing sub01...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
73000


0it [00:00, ?it/s]

10000it [00:02, 3873.01it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
73000


10000it [00:02, 4220.84it/s]


Processing sub02...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72974


10000it [00:02, 4407.84it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72974


10000it [00:02, 4160.80it/s]


Processing sub03...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72990


6234it [00:01, 4064.81it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72990


6234it [00:01, 4038.98it/s]


Processing sub04...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72980


5445it [00:01, 4483.18it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72980


5445it [00:01, 4386.73it/s]


Processing sub05...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72994


10000it [00:02, 3942.51it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72994


10000it [00:02, 4377.79it/s]


Processing sub06...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72985


6234it [00:01, 4001.56it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72985


6234it [00:01, 4356.83it/s]


Processing sub07...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72999


10000it [00:02, 4225.86it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72999


10000it [00:02, 4457.17it/s]


Processing sub08...
Processing lPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72997


5445it [00:01, 4082.65it/s]


Processing rPPA_betas-GLMsingle_type-typeb_z=1.pkl...
72997


5445it [00:01, 4138.24it/s]


## Transform BMD betas

In [15]:


def bmd_betas_to_indiv_files(betas_path, 
                             subs=None,
                             rois=None):

    if subs is None:
        subs = sorted(os.listdir(betas_path))
    
    for sub in subs:
        print(f'Processing {sub}...')
        if rois is None:
            rois = sorted([r.split('_')[0] for r in os.listdir(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl'))])
        for roi in rois:
            roi_pkl = roi+'_betas-GLMsingle_type-typed_z=1.pkl'
            print(f'Processing {roi_pkl}...')
            with open(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl', roi_pkl), 'rb') as f:
                data = pkl.load(f)

            print(data.keys())

            # d = np.concatenate([data['train_data_allvoxel'], data['test_data_allvoxel']], axis=0)
            d_train = data['train_data_allvoxel']
            d_test = data['test_data_allvoxel']
            
            savedir = os.path.join(betas_path, sub, 'indiv_npys', roi_pkl[:-4])
            os.makedirs(savedir, exist_ok=True)
            for s, stim in enumerate(d_train):
                for r, rep in enumerate(stim):
                    npy_name = f'{s+1:04d}_{r}.npy'
                    np.save(os.path.join(savedir, npy_name), rep)
            for s, stim in enumerate(d_test):
                for r, rep in enumerate(stim):
                    npy_name = f'{s+1+len(d_train):04d}_{r}.npy'
                    np.save(os.path.join(savedir, npy_name), rep)


In [14]:
bmd_betas_to_indiv_files('../data/betas_impulse', 
                     rois=['lPPA', 'rPPA'])

Processing sub01...
Processing lPPA_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])
Processing rPPA_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])
Processing sub02...
Processing lPPA_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])
Processing rPPA_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])
Processing sub03...
Processing lPPA_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_n

In [16]:
# Transform CIFTI betas to indiv files 
# (cifti betas pickles have the same format as impulse/volumetric betas)

bmd_betas_to_indiv_files('../data/betas_cifti_bmd',
                            rois=['Group41'])
                         

Processing sub01...
Processing Group41_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['train_data_allvoxel', 'test_data_allvoxel', 'train_noiseceiling_allvoxel', 'test_noiseceiling_allvoxel', 'roi_indices_hcp'])
Processing sub02...
Processing Group41_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['train_data_allvoxel', 'test_data_allvoxel', 'train_noiseceiling_allvoxel', 'test_noiseceiling_allvoxel', 'roi_indices_hcp'])
Processing sub03...
Processing Group41_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['train_data_allvoxel', 'test_data_allvoxel', 'train_noiseceiling_allvoxel', 'test_noiseceiling_allvoxel', 'roi_indices_hcp'])
Processing sub04...
Processing Group41_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['train_data_allvoxel', 'test_data_allvoxel', 'train_noiseceiling_allvoxel', 'test_noiseceiling_allvoxel', 'roi_indices_hcp'])
Processing sub05...
Processing Group41_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['train_data_allvoxel', 'test_data_allvoxel', 'train_nois

## Transform HAD cifti betas

In [18]:

def had_betas_to_indiv_files(betas_path, 
                         subs=None, 
                         rois=None):
    
    if subs is None:
        subs = sorted(os.listdir(betas_path))

    for sub in subs:
        print(f'Processing {sub}...')
        if rois is None:
            rois = sorted([r.split('_')[0] for r in os.listdir(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl'))])
        for roi in rois:
            roi_pkl = roi+'_betas-GLMsingle_type-typeb_z=1.pkl'
            print(f'Processing {roi_pkl}...')
            with open(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl', roi_pkl), 'rb') as f:
                data = pkl.load(f)
            d = data['data_allvoxel']
            vid_names = data['stim_order']
            savedir = os.path.join(betas_path, sub, 'indiv_npys', roi_pkl[:-4])
            os.makedirs(savedir, exist_ok=True)
            for s, stim in enumerate(d):
                for r, rep in enumerate(stim):
                    npy_name = f'{vid_names[s]}.npy'
                    np.save(os.path.join(savedir, npy_name), rep)

In [19]:
had_betas_to_indiv_files('../data/betas_cifti_had', 
                     rois=['Group41'])

Processing sub01...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub02...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub03...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub04...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub05...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub06...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub07...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub08...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub09...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub10...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub11...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub12...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...
Processing sub13...
Processing Group41_betas-GLMsingle_type-typeb_z=1.pkl...