In [3]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

In [2]:
#import packages
import argparse
import pandas as pd
import h5py
import numpy as np
import scipy as sp
import scipy.stats as stats
import scipy.io
import nibabel.freesurfer.mghformat as mgh
import scipy.io
import itertools 
import pickle
import sys

utils_dir = '/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/NSD/code/streams/utils/'
sys.path.append(utils_dir)

from rsm_utils import get_flat_lower_tri, get_reliability_data 

data_dir = '/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/NSD/data/'
local_data_dir = '/oak/stanford/groups/kalanit/biac2/kgs/projects/Dawn/NSD/local_data/'

def fast_pearson(x,y):
    #faster, vectorized version
    xz = x - x.mean(axis=0)
    yz = y - y.mean(axis=0)
    xzss = (xz * xz).sum(axis=0)
    yzss = (yz * yz).sum(axis=0)
    r = np.matmul(xz.transpose(), yz) / (np.sqrt(np.outer(xzss, yzss)) + np.finfo(float).eps) #precision issues
    return np.maximum(np.minimum(r, 1.0), -1.0) #for precision issues

In [4]:
subjid = '06'
hemi = 'rh'

In [5]:
print(subjid)

n_repeats = 3

#get trial ids and mask        
all_ids = []
max_session = np.zeros(len([subjid]))
for sidx, sid in enumerate([subjid]):

    data = pd.read_csv(data_dir+'nsddata/ppdata/subj'+ sid +'/behav/responses.tsv', sep='\t')

    max_session[sidx] = np.max(np.array(data['SESSION'])) 

    all_ids.append(np.array(data['73KID']))

which_reps = []
for sidx, sid in enumerate([subjid]):
    vals, idx_start, count = np.unique(all_ids[sidx], return_counts=True,
                                    return_index=True)
    which_reps.append(vals[count == n_repeats])

least_trials = min(which_reps, key=len)

id_nums_3reps = []
mask_3reps = []
for sidx, sid in enumerate([subjid]):

    data = pd.read_csv(data_dir+'nsddata/ppdata/subj'+ sid +'/behav/responses.tsv', sep='\t')

    mask_3reps.append(np.isin(all_ids[sidx],which_reps[sidx]))
    id_nums_3reps.append(np.array(data['73KID'])[mask_3reps[sidx]])

arr1inds = id_nums_3reps[sidx].argsort()


06


In [6]:
rh_streams = []
for sidx, sid in enumerate([subjid]):
    mgh_file = mgh.load(data_dir+'nsddata/freesurfer/subj'+ sid +'/label/rh.streams_shrink5.mgz')
    rh_streams.append(mgh_file.get_fdata()[:,0,0])

In [7]:
stream_idx = np.where(rh_streams[0] != 0)

In [8]:
len(stream_idx[0])

50244

In [9]:
mask = mask_3reps[sidx]
sess = 1
idx = '0' + str(sess)

raw_betas = h5py.File(local_data_dir+'freesurfer/subj'+sid+'/betas/'+ hemi +'.zscore_betas_session'+idx+'.hdf5','r')

sess_betas = raw_betas['zscore_betas'][:][mask[(sess-1)*750:sess*750]]

In [10]:
len(sess_betas[0])

259406

In [11]:
len(sess_betas[:,stream_idx[0]][0])

50244

In [12]:
for sidx, sid in enumerate([subjid]):
        
    mask = mask_3reps[sidx]
    sorted_betas = []

    #get all betas across all sessions
    for sess in range(1,int(max_session[sidx])+1):
        print(sess)

        if(sess < 10):
            idx = '0' + str(sess)
        else:
            idx = str(sess)

        raw_betas = h5py.File(local_data_dir+'freesurfer/subj'+sid+'/betas/'+ hemi +'.zscore_betas_session'+idx+'.hdf5','r')

        sess_betas = raw_betas['zscore_betas'][:][mask[(sess-1)*750:sess*750]]
        del raw_betas

        if(sess==1):
            betas_trimmed = sess_betas[:,stream_idx[0]]
        else:
            betas_trimmed = np.append(betas_trimmed,sess_betas[:,stream_idx[0]],axis=0)

        del sess_betas

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


In [13]:
betas_trimmed.shape

(18702, 50244)

In [14]:
betas_by_repeat = []
sorted_betas = betas_trimmed[arr1inds[::-1]]

for r in range(n_repeats):
    betas_by_repeat.insert(r,sorted_betas[r::3])

In [16]:
del sorted_betas

In [17]:
del betas_trimmed

In [102]:
view = np.corrcoef(betas_by_repeat[0].T)

In [106]:
corr_struct_r1 = view

In [114]:
corr_struct_r1[0:100,].shape

(100, 39216)

In [115]:
#create dict for matlab
corr_struct = {}
corr_struct['matrix'] = corr_struct_r1[0:100,]
corr_struct['idx'] = stream_idx[0:100]

#save out
save_dir = '../../../local_data/processed'
scipy.io.savemat(save_dir + '/subj'+ sid+hemi+'_r1corrs.mat', corr_struct)

In [116]:
betas_by_repeat[0][:,0:10]

array([[ 0.52702347,  0.70148925,  0.67306408, ...,  0.93298385,
         1.07363744,  1.27608775],
       [-1.98816976, -2.06754873, -1.88364633, ..., -0.66691846,
        -1.26206734, -1.34072658],
       [ 0.49875258,  0.46666021,  1.58929746, ...,  0.66165018,
         0.20301394,  1.51180731],
       ...,
       [-0.70195495, -0.98641107, -1.47951335, ..., -0.82946236,
        -1.09007305, -0.81582154],
       [-0.26479123,  0.10511252, -0.07815462, ...,  0.93139166,
         0.84004851,  0.16542265],
       [-0.09343259,  0.51011874,  0.49939994, ..., -0.13318631,
         0.41293974, -0.18298244]])

In [19]:
corr_struct_r1r2 = fast_pearson(betas_by_repeat[0][:,0:10],betas_by_repeat[1][:,0:10])

In [20]:
corr_struct_r1r2.shape

(10, 10)

In [24]:
corr_struct_r1r2

array([[0.15029891, 0.14397391, 0.14819715, 0.13986979, 0.13127931,
        0.14785045, 0.14286334, 0.12826263, 0.10408985, 0.13048967],
       [0.14053085, 0.13575696, 0.13789429, 0.13048235, 0.12207548,
        0.13652155, 0.13138594, 0.11743012, 0.09674538, 0.12172708],
       [0.14896373, 0.14230913, 0.14718897, 0.13869753, 0.13056276,
        0.14785949, 0.14299308, 0.12885751, 0.09993433, 0.12691131],
       [0.13860143, 0.13285384, 0.13653666, 0.12852161, 0.12066293,
        0.13695469, 0.13177063, 0.11823559, 0.0898978 , 0.11522504],
       [0.13137341, 0.1258301 , 0.12934305, 0.12152518, 0.1141309 ,
        0.13011574, 0.12501516, 0.11240704, 0.08101907, 0.10547761],
       [0.15358181, 0.1461884 , 0.15245007, 0.14378683, 0.13596023,
        0.15390347, 0.14964389, 0.13577908, 0.10247162, 0.13061936],
       [0.14880044, 0.14151409, 0.14756428, 0.13877121, 0.13109962,
        0.1493308 , 0.14471415, 0.13121741, 0.09450693, 0.12226211],
       [0.13637677, 0.12957353, 0.1348970