# Notebook to put all betas into individual pkl files


In [1]:
import numpy as np
import pickle as pkl
import os
from tqdm import tqdm

## Transform NSD betas

In [8]:
# Function that loads a pkl file with betas and stores each vector separately

def nsd_betas_to_indiv_files(betas_path, 
                         subs=None, 
                         rois=None):

    if subs is None:
        subs = sorted(os.listdir(betas_path))
    
    for sub in subs:
        # Load pickle with ids of stimuli
        with open(f'{betas_path}/{sub}/events_imgtag-73k_id.pkl', 'rb') as f:
            idxs = pkl.load(f)[0]
        print(f'Processing {sub}...')
        if rois is None:
            rois = sorted([r.split('_')[0] for r in os.listdir(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl'))])
        for roi in rois:
            roi_pkl = roi+'_betas-GLMsingle_type-typeb_z=1.pkl'
            print(f'Processing {roi_pkl}...')
            with open(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl', roi_pkl), 'rb') as f:
                data = pkl.load(f)
            d = data['data_allvoxel']
            savedir = os.path.join(betas_path, sub, 'indiv_npys', roi_pkl[:-4])
            
            # If savedir exists, delete all its contents
            if os.path.exists(savedir):
                for f in os.listdir(savedir):
                    os.remove(os.path.join(savedir, f))
            else:
                os.makedirs(savedir, exist_ok=True)

            print(max(idxs))
            for s, stim in tqdm(enumerate(d)):
                for r, rep in enumerate(stim):
                    npy_name = f'{idxs[s]-1:06d}_{r}.npy' #TODO: Check if indexes match correctly here
                    np.save(os.path.join(savedir, npy_name), rep)


In [9]:
nsd_betas_to_indiv_files('../data/betas_nsd', rois=['BMDgeneral'])

Processing sub01...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
73000


10000it [00:44, 222.79it/s]


Processing sub02...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72974


10000it [00:36, 276.94it/s]


Processing sub03...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72990


6234it [00:23, 266.35it/s]


Processing sub04...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72980


5445it [00:19, 280.40it/s]


Processing sub05...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72994


10000it [00:37, 264.76it/s]


Processing sub06...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72985


6234it [00:23, 267.30it/s]


Processing sub07...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72999


10000it [00:35, 281.03it/s]


Processing sub08...
Processing BMDgeneral_betas-GLMsingle_type-typeb_z=1.pkl...
72997


5445it [00:19, 285.27it/s]


## Transform BMD betas

In [4]:


def bmd_betas_to_indiv_files(betas_path, 
                             subs=None,
                             rois=None):

    if subs is None:
        subs = sorted(os.listdir(betas_path))
    
    for sub in subs:
        print(f'Processing {sub}...')
        if rois is None:
            rois = sorted([r.split('_')[0] for r in os.listdir(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl'))])
        for roi in rois:
            roi_pkl = roi+'_betas-GLMsingle_type-typed_z=1.pkl'
            print(f'Processing {roi_pkl}...')
            with open(os.path.join(betas_path, sub, 'prepared_allvoxel_pkl', roi_pkl), 'rb') as f:
                data = pkl.load(f)

            print(data.keys())

            # d = np.concatenate([data['train_data_allvoxel'], data['test_data_allvoxel']], axis=0)
            d_train = data['train_data_allvoxel']
            d_test = data['test_data_allvoxel']
            
            savedir = os.path.join(betas_path, sub, 'indiv_npys', roi_pkl[:-4])
            os.makedirs(savedir, exist_ok=True)
            for s, stim in enumerate(d_train):
                for r, rep in enumerate(stim):
                    npy_name = f'{s+1:04d}_{r}.npy'
                    np.save(os.path.join(savedir, npy_name), rep)
            for s, stim in enumerate(d_test):
                for r, rep in enumerate(stim):
                    npy_name = f'{s+1+len(d_train):04d}_{r}.npy'
                    np.save(os.path.join(savedir, npy_name), rep)


In [5]:
bmd_betas_to_indiv_files('../data/betas_impulse', 
                     rois=['BMDgeneral'])

Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 310.19it/s]
102it [00:01, 73.92it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 329.26it/s]
102it [00:01, 76.96it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:02, 333.54it/s]
102it [00:01, 77.90it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 322.26it/s]
102it [00:01, 76.28it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 324.42it/s]
102it [00:01, 72.30it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 325.46it/s]
102it [00:01, 76.88it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 328.13it/s]
102it [00:01, 78.31it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:03, 331.01it/s]
102it [00:01, 76.19it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:02, 344.34it/s]
102it [00:01, 76.27it/s]


Processing BMDgeneral_betas-GLMsingle_type-typed_z=1.pkl...
dict_keys(['test_data_allvoxel', 'test_noiseceiling_allvoxel', 'train_data_allvoxel', 'train_noiseceiling_allvoxel', 'roi_indices_fullvolume'])


1000it [00:02, 339.41it/s]
102it [00:01, 75.32it/s]
