From [naturalscenesdataset.org](https://naturalscenesdataset.org):
```text
The Natural Scenes Dataset (NSD) is a large-scale fMRI dataset conducted at ultra-high-field (7T) strength at the Center of Magnetic Resonance Research (CMRR) at the University of Minnesota. The dataset consists of whole-brain, high-resolution (1.8-mm isotropic, 1.6-s sampling rate) fMRI measurements of 8 healthy adult subjects while they viewed thousands of color natural scenes over the course of 30–40 scan sessions. While viewing these images, subjects were engaged in a continuous recognition task in which they reported whether they had seen each given image at any point in the experiment. These data constitute a massive benchmark dataset for computational models of visual representation and cognition, and can support a wide range of scientific inquiry.
```


In [29]:
from cloudpathlib import S3Path, S3Client
from pathlib import Path

# Set up our cache path:
cache_path = Path('/tmp/cache')
if not cache_path.exists():
    cache_path.mkdir()

# Create the root S3Path for the NSD:
nsd_base_path = S3Path(
    's3://natural-scenes-dataset/',
    client=S3Client(
        no_sign_request=True,
        local_cache_dir=cache_path))

In [30]:
import os
import os.path as op
import glob
import nibabel as nib
import numpy as np
import pandas as pd
import h5py

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from nilearn import plotting

import urllib.request, zipfile
try:
    from pycocotools.coco import COCO
except ImportError as e:
    !pip install pycocotools
    from pycocotools.coco import COCO


#from nsd_access import NSDAccess

%matplotlib inline

Collecting pycocotools
  Obtaining dependency information for pycocotools from https://files.pythonhosted.org/packages/ba/64/0451cf41a00fd5ac4501de4ea0e395b7d909e09d665e56890b5d3809ae26/pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Using cached pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Using cached pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (426 kB)
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0.7


In [31]:
from utils import ls, crawl

nsd_pppath = nsd_base_path / 'nsddata_betas' / 'ppdata'

ls(nsd_pppath / 'subj01' / 'fsaverage' / 'betas_fithrf_GLMdenoise_RR')

[S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session01.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session02.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session03.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session04.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session05.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session06.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh.betas_session07.mgh'),
 S3Path('s3://natural-scenes-dataset/nsddata_betas/ppdata/subj01/fsaverage/betas_fithrf_GLMdenoise_RR/lh

### Loading data in volume space (1.8mm isotropic resolution)

In [32]:
vimg_filename = nsd_pppath / 'subj01' / 'func1pt8mm' / 'betas_fithrf_GLMdenoise_RR' / 'betas_session01.nii.gz'
vimg = nib.load(vimg_filename.fspath)
vimg = vimg.dataobj

# reshape to vector of nvoxels x ntimepoints
vimg = vimg.reshape([np.prod(vimg.shape[0:-1]),vimg.shape[-1]])

vimg.shape

[699192, 750]

### Loading data in surface space (fsaverage)

In [33]:
simg_filename = nsd_pppath / 'subj01' / 'fsaverage' / 'betas_fithrf_GLMdenoise_RR' / 'lh.betas_session01.mgh'
simg = nib.load(simg_filename.fspath)
simg = simg.dataobj

# reshape to vector of nvoxels x ntimepoints
simg = simg.reshape([np.prod(simg.shape[0:-1]),simg.shape[-1]])

simg.shape

[163842, 750]

### Loading in stimuli data

In [34]:
ls(nsd_base_path)
nsd_stimuli = nsd_base_path / 'nsddata_stimuli' / 'stimuli' / 'nsd'
ls(nsd_stimuli)

[S3Path('s3://natural-scenes-dataset/nsddata_stimuli/stimuli/nsd/nsd_stimuli.hdf5')]

In [49]:
f1 = h5py.File((nsd_stimuli / 'nsd_stimuli.hdf5').fspath, 'r+')
print("Keys: %s" % f1.keys())
dset = f1['imgBrick']
#f1.close

Keys: <KeysViewHDF5 ['imgBrick']>


In [46]:
dset

<HDF5 file "nsd_stimuli.hdf5" (mode r+)>

In [11]:
dset[0].shape

(425, 425, 3)

In [None]:
#73000 images, 425x425 shape and 3 RGB color dimensions

### Load in stimulus set for subj01

In [131]:
stimdata_s1 = nsd_base_path / 'nsddata' / 'ppdata' / 'subj01' / 'behav' / 'responses.tsv'
#ls(stimdata_s1)
expdata = pd.read_table(stimdata_s1)
#expdata.iloc[0:64]

In [132]:
sub_df = expdata[["SUBJECT","SESSION","RUN","TRIAL","73KID"]]
sub_df

Unnamed: 0,SUBJECT,SESSION,RUN,TRIAL,73KID
0,1,1,1,1,46003
1,1,1,1,2,61883
2,1,1,1,3,829
3,1,1,1,4,67574
4,1,1,1,5,16021
...,...,...,...,...,...
29995,1,40,12,58,13774
29996,1,40,12,59,66768
29997,1,40,12,60,53168
29998,1,40,12,61,1944


In [84]:
[min(sub_df['73KID']), max(sub_df['73KID'])]

[14, 73000]

### Read in the annotated labels

In [133]:
#Find cocoIDs for each stimulus
ls(nsd_base_path)
nsd_cocoIDs = nsd_base_path / 'nsddata' / 'experiments' / 'nsd' / 'nsd_stim_info_merged.csv'

In [134]:
cocoID_DF = pd.read_csv(nsd_cocoIDs)
cocoID = cocoID_DF['cocoId']
cocoID

0        532481
1        245764
2        385029
3        311303
4        393226
          ...  
72995    518071
72996    255930
72997    255934
72998    518080
72999    518083
Name: cocoId, Length: 73000, dtype: int64

In [None]:
#add respective cocoID to the dataframe above to match the 73KID

In [135]:
sub_cocoID = []
for trial in sub_df["73KID"]:
    sub_cocoID.append(cocoID.iloc[trial-1])

In [139]:
sub_cocoid = pd.DataFrame({"COCOID": sub_cocoID})
sub_DF = pd.merge(sub_df, sub_cocoid, left_index=True, right_index=True)
sub_DF

Unnamed: 0,SUBJECT,SESSION,RUN,TRIAL,73KID,COCOID
0,1,1,1,1,46003,412922
1,1,1,1,2,61883,474858
2,1,1,1,3,829,320696
3,1,1,1,4,67574,234676
4,1,1,1,5,16021,301595
...,...,...,...,...,...,...
29995,1,40,12,58,13774,32606
29996,1,40,12,59,66768,388123
29997,1,40,12,60,53168,179070
29998,1,40,12,61,1944,13597


In [None]:
#read the labels of the cocoIDs and add column to the data frame

In [6]:
import urllib.request
import zipfile
import json
urllib.request.urlretrieve('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', 'coco_annotations_2017.zip')

with open('./annotations/instances_train2017.json') as json_data:
  file_contents = json_data.read()

data = json.loads(file_contents)
data.keys()

In [25]:
annotations = data['annotations']
categories = data['categories']

In [165]:
ids_list = [data['annotations'][i]['id'] for i in range(len(data['annotations']))]
coco_image_ids_list = [data['annotations'][i]['image_id'] for i in range(len(data['annotations']))]
#coco_image_ids_list

In [180]:
image_id_to_text = {data['categories'][i]['id'] : data['categories'][i]['name'] for i in range(80)}
cat_id_to_text = {data['categories'][i]['id'] : data['categories'][i]['supercategory'] for i in range(80)}
cat_id_to_text

{1: 'person',
 2: 'vehicle',
 3: 'vehicle',
 4: 'vehicle',
 5: 'vehicle',
 6: 'vehicle',
 7: 'vehicle',
 8: 'vehicle',
 9: 'vehicle',
 10: 'outdoor',
 11: 'outdoor',
 13: 'outdoor',
 14: 'outdoor',
 15: 'outdoor',
 16: 'animal',
 17: 'animal',
 18: 'animal',
 19: 'animal',
 20: 'animal',
 21: 'animal',
 22: 'animal',
 23: 'animal',
 24: 'animal',
 25: 'animal',
 27: 'accessory',
 28: 'accessory',
 31: 'accessory',
 32: 'accessory',
 33: 'accessory',
 34: 'sports',
 35: 'sports',
 36: 'sports',
 37: 'sports',
 38: 'sports',
 39: 'sports',
 40: 'sports',
 41: 'sports',
 42: 'sports',
 43: 'sports',
 44: 'kitchen',
 46: 'kitchen',
 47: 'kitchen',
 48: 'kitchen',
 49: 'kitchen',
 50: 'kitchen',
 51: 'kitchen',
 52: 'food',
 53: 'food',
 54: 'food',
 55: 'food',
 56: 'food',
 57: 'food',
 58: 'food',
 59: 'food',
 60: 'food',
 61: 'food',
 62: 'furniture',
 63: 'furniture',
 64: 'furniture',
 65: 'furniture',
 67: 'furniture',
 70: 'furniture',
 72: 'electronic',
 73: 'electronic',
 74: 'el

In [182]:
image_labels_dict_numeric = {image_id : [] for image_id in coco_image_ids_list}
image_labels_dict_text = {image_id : [] for image_id in coco_image_ids_list}
cat_labels_dict_numeric = {supercategory : [] for supercategory in coco_image_ids_list}
cat_labels_dict_text = {supercategory : [] for supercategory in coco_image_ids_list}

for item in data['annotations']:
    image_labels_dict_numeric[item['image_id']].append(item['category_id'])
    image_labels_dict_text[item['image_id']].append(image_id_to_text[item['category_id']])
    cat_labels_dict_numeric[item['image_id']].append(item['category_id'])
    cat_labels_dict_text[item['image_id']].append(cat_id_to_text[item['category_id']])
    
#cat_labels_dict_text

In [None]:
for dictionary in [image_labels_dict_numeric, image_labels_dict_text]:
    for key,value in dictionary.items():
        value = list(set(value))
        value.sort()
        dictionary[key] = value

In [184]:
for dictionary in [cat_labels_dict_numeric, cat_labels_dict_text]:
    for key,value in dictionary.items():
        value = list(set(value))
        value.sort()
        dictionary[key] = value
        
cat_labels_dict_text

{558840: ['food', 'furniture', 'kitchen', 'person'],
 200365: ['food', 'furniture', 'kitchen', 'vehicle'],
 495357: ['accessory', 'animal', 'person', 'vehicle'],
 116061: ['accessory', 'animal', 'kitchen', 'person', 'vehicle'],
 16164: ['animal', 'furniture'],
 205350: ['animal', 'appliance'],
 74: ['animal', 'person', 'vehicle'],
 212545: ['animal', 'person'],
 514915: ['animal', 'kitchen'],
 154589: ['animal', 'appliance', 'food', 'indoor', 'kitchen', 'person'],
 471175: ['animal', 'person'],
 225919: ['accessory', 'animal', 'person', 'vehicle'],
 400728: ['animal', 'furniture', 'vehicle'],
 194306: ['animal', 'vehicle'],
 383780: ['animal', 'furniture'],
 580255: ['accessory', 'animal'],
 370210: ['animal', 'outdoor', 'person', 'vehicle'],
 75283: ['animal', 'person', 'vehicle'],
 325969: ['animal', 'person', 'vehicle'],
 251716: ['animal', 'person', 'vehicle'],
 13882: ['animal', 'furniture'],
 185156: ['animal', 'person', 'vehicle'],
 176697: ['animal'],
 376608: ['animal'],
 1789