In [1]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

In [2]:
import h5py
import numpy as np
import scipy as sp
import scipy.stats as stats
import nibabel as nibabel
import pandas as pd
import nibabel.freesurfer.mghformat as mgh
import scipy.io
from itertools import combinations 
import pickle

In [3]:
data_dir = '../../../data/'
local_dir = '../../../local_data/freesurfer/'

In [4]:
subjid = ['01', '02', '03', '04', '05', '06', '07', '08']
ROI_names = ['Unknown', 'Early', 'Midventral', 'Midlateral', 'Midparietal', 'Ventral', 'Lateral', 'Parietal']
n_repeats = 3

In [5]:
all_ids = []
max_session = np.zeros(len(subjid))
for sidx, sid in enumerate(subjid):
    
    data = pd.read_csv('../../../data/nsddata/ppdata/subj'+ sid +'/behav/responses.tsv', sep='\t')
    
    max_session[sidx] = np.max(np.array(data['SESSION'])) 
    
    all_ids.append(np.array(data['73KID']))
    
    #shared_mask.append(np.isin(all_ids[sidx],sharedix))

In [6]:
all_ids

[array([46003, 61883,   829, ..., 53168,  1944,  5034]),
 array([46003, 42020, 22500, ..., 61376, 42648, 69768]),
 array([46003, 19257, 36386, ..., 56937, 28438, 42959]),
 array([46003, 23082,  8031, ..., 67073, 12918,  3388]),
 array([46003,  5737,  9204, ..., 44063, 53238, 54913]),
 array([46003, 21397, 18925, ..., 58176, 18756, 46521]),
 array([46003, 16507,  9387, ..., 34541, 31244, 55231]),
 array([46003, 57484, 41828, ..., 54394,    30, 23135])]

In [7]:
which_reps = []
for sidx, sid in enumerate(subjid):
    vals, idx_start, count = np.unique(all_ids[sidx], return_counts=True,
                                    return_index=True)
    which_reps.append(vals[count == n_repeats])
    
least_trials = min(which_reps, key=len)

In [8]:
id_nums_3reps = []
mask_3reps = []
for sidx, sid in enumerate(subjid):
    
    data = pd.read_csv('../../../data/nsddata/ppdata/subj'+ sid +'/behav/responses.tsv', sep='\t')
    
    mask_3reps.append(np.isin(all_ids[sidx],which_reps[sidx]))
    id_nums_3reps.append(np.array(data['73KID'])[mask_3reps[sidx]])


In [9]:
id_nums_3reps

[array([46003, 61883,   829, ..., 53168,  1944,  5034]),
 array([46003, 42020, 22500, ..., 61376, 42648, 69768]),
 array([19257, 36386, 44343, ..., 66372, 56937, 42959]),
 array([23082,  8031, 36381, ..., 67739, 67073,  3388]),
 array([46003,  5737,  9204, ..., 44063, 53238, 54913]),
 array([21397, 18925, 27158, ..., 13035, 58176, 46521]),
 array([46003, 16507,  9387, ..., 34541, 31244, 55231]),
 array([57484, 41828, 69686, ..., 57428, 54394, 23135])]

In [10]:
rh_streams = []
for sidx, sid in enumerate(subjid):
    mgh_file = mgh.load(data_dir+'nsddata/freesurfer/subj'+ sid +'/label/rh.streams.mgz')
    rh_streams.append(mgh_file.get_fdata()[:,0,0])

In [12]:
#right hemisphere
for sidx, sid in enumerate(subjid):
    
    print(sidx)
    mask = mask_3reps[sidx]
    betas_by_ROI = []
    
    #get all betas across all sessions
    for sess in range(1,int(max_session[sidx])+1):
                
        if(sess < 10):
            idx = '0' + str(sess)
        else:
            idx = str(sess)

        raw_betas = h5py.File(local_dir+'subj'+sid+'/betas/rh.zscore_betas_session'+idx+'.hdf5','r')
        #betas = raw_betas['zscore_betas'][:]

        sess_betas = raw_betas['zscore_betas'][:][mask[(sess-1)*750:sess*750]]
        del raw_betas

        if(sess==1):
            ventral_betas = sess_betas[:,rh_streams[sidx] == 5]
            lateral_betas = sess_betas[:,rh_streams[sidx] == 6]
            parietal_betas = sess_betas[:,rh_streams[sidx] == 7]
        else:
            ventral_betas = np.concatenate((ventral_betas, sess_betas[:,rh_streams[sidx] == 5]))
            lateral_betas = np.concatenate((lateral_betas, sess_betas[:,rh_streams[sidx] == 6]))
            parietal_betas = np.concatenate((parietal_betas, sess_betas[:,rh_streams[sidx] == 7]))

        del sess_betas
    
    arr1inds = id_nums_3reps[sidx].argsort()
    
    sorted_subj_betas_ventral = []
    sorted_subj_betas_ventral.append(ventral_betas[arr1inds[::-1]])
    del ventral_betas
    sorted_subj_betas_lateral = []
    sorted_subj_betas_lateral.append(lateral_betas[arr1inds[::-1]])
    del lateral_betas
    sorted_subj_betas_parietal = []
    sorted_subj_betas_parietal.append(parietal_betas[arr1inds[::-1]])
    del parietal_betas

    betas_by_repeat_ventral = []
    betas_by_repeat_lateral = []
    betas_by_repeat_parietal = []
    for r in range(n_repeats):
        betas_by_repeat_ventral.insert(r,sorted_subj_betas_ventral[0][r::3])
        betas_by_repeat_lateral.insert(r,sorted_subj_betas_lateral[0][r::3])
        betas_by_repeat_parietal.insert(r,sorted_subj_betas_parietal[0][r::3])

    #save out
    save_file = '../../../local_data/processed/subj' + sid +'_all_HVA_zscored_betas.data'

    with open(save_file, 'wb') as filehandle:
        # store the data as binary data stream
        pickle.dump([betas_by_repeat_ventral,
                     betas_by_repeat_lateral,
                     betas_by_repeat_parietal], filehandle)
    
    del betas_by_repeat_ventral, betas_by_repeat_lateral, betas_by_repeat_parietal


0
1
2
3
4
5
6
7


In [None]:
max_session