In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys
import time
import random
import json
import gc

import numpy as np
import pandas as pd
import torch
import torchio as tio
import h5py
from ipywidgets import interact
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.notebook import tqdm
import nibabel as nib
from einops import rearrange
from scipy import ndimage


dir2 = os.path.abspath('../..')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: 
    sys.path.append(dir1)
    
from research.data.natural_scenes import (
    NaturalScenesDataset,
    StimulusDataset,
    KeyDataset
)
from research.experiments.nsd_access import NSDAccess

C:\Users\Cefir\anaconda3\envs\Neurophysiological-Data-Decoding\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
C:\Users\Cefir\anaconda3\envs\Neurophysiological-Data-Decoding\lib\site-packages\numpy\.libs\libopenblas.GK7GX5KEQ4F6UYO3P26ULGBQYHGQO7J4.gfortran-win_amd64.dll
C:\Users\Cefir\anaconda3\envs\Neurophysiological-Data-Decoding\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
C:\Users\Cefir\anaconda3\envs\Neurophysiological-Data-Decoding\lib\site-packages\numpy\.libs\libopenblas.xwydx2ikjw2nmtwsfyngfuwkqu3lytcz.gfortran-win_amd64.dll


In [2]:
nsd_path = Path('D:\\Datasets\\NSD\\')
nsd = NaturalScenesDataset(nsd_path)
nsd_access = NSDAccess(nsd_path)

In [3]:
@interact(i=(0, 73000-1), info_type=['captions', 'person_keypoints', 'instances'], show_img=True, show_annot=True)
def show(i, info_type, show_img, show_annot):
    output = nsd_access.read_image_coco_info([i], info_type='captions', show_img=show_img, show_annot=show_annot)

interactive(children=(IntSlider(value=36499, description='i', max=72999), Dropdown(description='info_type', op…

In [4]:
dataset_path = Path('D:\\Datasets\\NSD\\')
derivatives_path = dataset_path / 'derivatives'
betas_path = dataset_path / 'nsddata_betas' / 'ppdata'
ppdata_path = dataset_path / 'nsddata' / 'ppdata'

stimulu_path = dataset_path / 'nsddata_stimuli' / 'stimuli' / 'nsd' / 'nsd_stimuli.hdf5'
stimulus_images = h5py.File(stimulu_path, 'r')['imgBrick']

simulus_information_path = dataset_path / 'nsddata' / 'experiments' / 'nsd' / 'nsd_stim_info_merged.csv'
simulus_information = pd.read_csv(simulus_information_path, index_col=0)

In [6]:
from PIL import Image

In [12]:
for i in tqdm(range(stimulus_images.shape[0])):
    img = Image.fromarray(stimulus_images[i])
    img.save(derivatives_path / f'stimulus_images/{i}.png')

  0%|          | 0/73000 [00:00<?, ?it/s]

In [4]:
subjects = {f'subj0{i}': {} for i in range(1, 9)}

for subject_name, subject_data in subjects.items():
    responses_file_path = ppdata_path / subject_name / 'behav' / 'responses.tsv'
    subject_data['responses'] = pd.read_csv(responses_file_path, sep='\t',)
    
    # The last 3 sessions are currently held-out for the algonauts challenge
    # remove them for now.
    session_ids = subject_data['responses']['SESSION']
    held_out_mask = session_ids > (np.max(session_ids) - 3)
    subject_data['responses'] = subject_data['responses'][~held_out_mask]
    
    subject_betas_path = derivatives_path / 'betas' / subject_name / 'func1pt8mm' / 'betas_fithrf_GLMdenoise_RR'
    num_sessions = np.max(subject_data['responses']['SESSION'])
    
    subject_data['betas'] = h5py.File(subject_betas_path / f'betas_sessions.hdf5', 'r')['betas']
    
    #subject_data['brainmask'] = nib.load(ppdata_path / subject_name / 'func1pt8mm' / 'brainmask.nii.gz')
    #subject_data['t1_path'] = ppdata_path / subject_name / 'func1pt8mm' / 'T1_to_func1pt8mm.nii.gz'

In [36]:
from pycocotools.coco import COCO

fold_names = ('train2017', 'val2017')
annotation_types = ('captions', 'instances', 'person_keypoints', 'stuff')

coco_path = Path('X:\Datasets\COCO')
annotation_path = coco_path / 'annotations'
coco_folds = {
    fold_name: {
        annotation_type: COCO(annotation_path / f'{annotation_type}_{fold_name}.json')
        for annotation_type in annotation_types
    }
    for fold_name in ('train2017', 'val2017')
}

#for fold in coco_folds.values():
#    for coco in fold.values():
#        coco.loadCats(coco.getCatIds())

loading annotations into memory...
Done (t=1.63s)
creating index...
index created!
loading annotations into memory...
Done (t=22.02s)
creating index...
index created!
loading annotations into memory...
Done (t=10.75s)
creating index...
index created!
loading annotations into memory...
Done (t=19.45s)
creating index...
index created!
loading annotations into memory...
Done (t=0.14s)
creating index...
index created!
loading annotations into memory...
Done (t=0.78s)
creating index...
index created!
loading annotations into memory...
Done (t=0.39s)
creating index...
index created!
loading annotations into memory...
Done (t=0.72s)
creating index...
index created!


In [43]:
coco_folds

{'train2017': {'captions': <pycocotools.coco.COCO at 0x1aee4e36c10>,
  'instances': <pycocotools.coco.COCO at 0x1aee44a8700>,
  'person_keypoints': <pycocotools.coco.COCO at 0x1aee44a8f10>,
  'stuff': <pycocotools.coco.COCO at 0x1aee44a8340>},
 'val2017': {'captions': <pycocotools.coco.COCO at 0x1aee44a8ac0>,
  'instances': <pycocotools.coco.COCO at 0x1aee44a8d00>,
  'person_keypoints': <pycocotools.coco.COCO at 0x1aee44a86a0>,
  'stuff': <pycocotools.coco.COCO at 0x1aee44a8490>}}

In [37]:
coco['instances'].dataset['categories']

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
 {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
 {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},

In [47]:
import skimage.io as io

def stimulus_viewer():
    
    @interact(subject_name=nsd.subjects.keys())
    def select_subject(subject_name):
        subject = nsd.subjects[subject_name]
        responses = subject['responses']
        
        @interact(response_id=(0, len(responses) - 1), show_annotations=False)
        def select_response(response_id, show_annotations):
            response = dict(responses.loc[response_id])
            
            nsd_stim_id = int(response['73KID']) - 1
            stim_info = dict(nsd.stimulus_info.loc[nsd_stim_id])
            
            coco_stim_id = stim_info['cocoId']
            
            fold = coco_folds[stim_info['cocoSplit']]
            coco = fold['instances']
            image_info = coco.loadImgs([coco_stim_id])[0]
            
            plt.figure(figsize=(12, 12))
            I = io.imread(image_info['coco_url'])
            plt.axis('off')
            plt.imshow(I)
            
            if show_annotations:
                annotation_ids = coco.getAnnIds(imgIds=coco_stim_id)
                annotations = coco.loadAnns(annotation_ids)
                coco.showAnns(annotations)
            
            coco_captions = fold['captions']
            annotation_ids = coco_captions.getAnnIds(imgIds=coco_stim_id);
            annotations = coco_captions.loadAnns(annotation_ids)
            for ann in annotations:
                print(ann['caption'])
            
            plt.show()
            
            print('nsd response', response)
            print('nsd stim info', stim_info)
            print('coco image info', image_info)
            

stimulus_viewer()

interactive(children=(Dropdown(description='subject_name', options=('subj01', 'subj02', 'subj03', 'subj04', 's…

In [None]:
subject = subjects['subj01']
responses = subject['responses']
response = zip(responses.columns, responses.iloc[1])
dict(responses.loc[4])