In [7]:
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import pandas as pd
import h5py
from os.path import join as pjoin
import csv
import itertools
import argparse
import time
from tqdm import tqdm
import multiprocessing

### Goal: get common pictures across 8 subjects

workflow:
1. read the design session tsv files from each subject
2. read the shared 1000 file to know which 1000 pics should be used
3. then get the overlap of the 1000 pics and the pics in each subject

In [8]:
class NSD_Processor:
    def __init__(self, subject="sub_01"):
        self.basedir = '/Volumes/side_project/nsd'
        self.subject = subject
        self.design_file = pjoin(self.basedir, f'{self.subject}/nsd_design')
        self.session_num = 37
        self.run_num_less = 12
        self.run_num_more = 14
        self.visualrois_num = 8

    def concat_all_designs(self,design_file):
        # create an empty array
        _1d_list = []
        ## create strings to read data
        for i in range(self.session_num):
            for z in range(self.run_num_more):
                try:
                    _filename = pjoin(design_file, 'design_session' + str(i+1).zfill(2)+'_run'+str(z+1).zfill(2)+'.tsv')
                    ## read tsv data
                    _data = pd.read_csv(_filename, sep='\t', header=None).values
                    
                    # get num larger than 0
                    _data_nonzero = _data[_data > 0]
                    _1d_list.append(_data_nonzero)
                except:
                    continue
        return np.array(list(itertools.chain(*_1d_list)))

In [9]:
subj1 = NSD_Processor(subject="subj01")
subj2 = NSD_Processor(subject="subj02")
subj3 = NSD_Processor(subject="subj03")
subj4 = NSD_Processor(subject="subj04")
subj5 = NSD_Processor(subject="subj05")
subj6 = NSD_Processor(subject="subj06")
subj7 = NSD_Processor(subject="subj07")
subj8 = NSD_Processor(subject="subj08")

subj1_imgs = subj1.concat_all_designs(subj1.design_file)
subj2_imgs = subj2.concat_all_designs(subj2.design_file)
subj3_imgs = subj3.concat_all_designs(subj3.design_file)
subj4_imgs = subj4.concat_all_designs(subj4.design_file)
subj5_imgs = subj5.concat_all_designs(subj5.design_file)
subj6_imgs = subj6.concat_all_designs(subj6.design_file)
subj7_imgs = subj7.concat_all_designs(subj7.design_file)
subj8_imgs = subj8.concat_all_designs(subj8.design_file)

In [10]:
# load shared 1000 tsv file
shared1000 = pd.read_csv('/Users/yilewang/Downloads/shared1000.tsv',sep='\t', header=None).values

In [19]:
a = []
for i in shared1000:
    if i in subj1_imgs and i in subj2_imgs and i in subj3_imgs and i in subj4_imgs and i in subj5_imgs and i in subj6_imgs and i in subj7_imgs and i in subj8_imgs:
        a.append(int(i))

# save a to npy file
np.save('common_imgs.npy', a)

In [20]:
# read npy file a
a = np.load('common_imgs.npy')
print(a)

[ 2951  2991  3050  3078  3147  3158  3165  3172  3182  3387  3435  3450
  3490  3627  3683  3688  3730  3810  3843  3848  3857  3914  3952  4052
  4059  4130  4157  4250  4326  4424  4437  4613  4668  4691  4769  4787
  4836  4870  4893  4931  5035  5107  5205  5302  5339  5460  5543  5584
  5603  5715  5879  6200  6223  6432  6445  6490  6515  6522  6559  6714
  6802  7008  7040  7121  7208  7337  7367  7410  7419  7481  7655  7660
  7841  7860  7945  7949  7955  8007  8110  8205  8226  8263  8275  8388
  8395  8416  8436  8466  8510  8632  8647  8808  8844  8926  8934  9028
  9049  9148  9231  9435  9463  9723  9805  9848  9866  9918  9979 10007
 10047 10065 10106 10108 10245 10472 10508 10601 10611 10908 11160 11334
 11488 11522 11567 11618 11636 11690 11726 11797 11838 11845 11933 11943
 11953 12076 12309 12488 12635 12686 12797 12799 12923 12938 13139 13231
 13313 13315 13614 13654 13663 13721 13847 14111 14122 14166 14180 14444
 14529 14568 14595 14611 14645 14794 14809 14821 14