In [11]:
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import pandas as pd
import h5py
from os.path import join as pjoin
import csv

### The workflow

1. prepare the matertial: 
    - stimulus: `h5py` file with size
    - ROI: `nii.gz` file, and the label: `ctab files`, with two template, `kastner2015`, and `prf-visualrois`.
    - beta values: 750 betas, each scan session, and 12 runs each session (37 session in total). 
2. finding labels for each voxel, so that I can know what regions they are in.
3. for each trial, find labels for each voxel. Dont average.
4. then save to an array using image as index, to average the trials so that each image has 3 trials.

### Reading the stimuli dataset
The size of the stimuli: (730000, 425, 425, 3), which corresponds to 730000 images of size 425x425 with 3 color channels (RGB).

In [6]:
basedir = '/mnt/c/Users/Wayne/Desktop/nsd'
stimuli_dir = pjoin(basedir, 'nsd_stimuli')
stimuli_file = pjoin(stimuli_dir, 'nsd_stimuli.hdf5')

# read hdf5 file
with h5py.File(stimuli_file, 'r') as f:
    # get data key
    data_key = list(f.keys())[0]
    dataset = f[data_key]
    print('dataset shape: ', dataset.shape)

dataset shape:  (73000, 425, 425, 3)


### Read NSD beta
There is 37 session in total. Each session has 750 trials. 

The 3D voxel space of the brain is (83, 104, 81).

In [5]:
betas_dir = pjoin(basedir, 'nsd_betas')
betas_file = pjoin(stimuli_dir, 'betas_session01.hdf5')

# read hdf5 file
f = h5py.File(betas_file, 'r')
# read s
data = f[list(f.keys())[0]]
print(data.shape)

(750, 83, 104, 81)


### Locate the labels of the ROIs

In [22]:
def separate_tab(file_path):
    """
    A function to create a dictionary from a tab separated file
    
    """
    label_dict = {}
    with open(file_path) as ctab:
        reader = csv.reader(ctab, delimiter='\t')
        for row in reader:
            # do something with row
            label_dict[row[0].split()[0]] = row[0].split()[1]
    return label_dict

In [38]:
roi_dir = pjoin(basedir, 'nsd_roi')

# prf rois
lh_prf_visual_file = pjoin(roi_dir, 'lh.prf-visualrois.nii.gz')
rh_prf_visual_file = pjoin(roi_dir, 'rh.prf-visualrois.nii.gz')

# import prf labels (ctab file)
prf_visual_labels_file = pjoin(roi_dir, 'prf-visualrois.mgz.ctab')

prf_visualrois_lables = separate_tab(prf_visual_labels_file)

# kastner rois
kastner_file = pjoin(roi_dir, 'Kastner2015.nii.gz')

kastner_labels_file = pjoin(roi_dir, 'Kastner2015.mgz.ctab')
kastner_labels = separate_tab(kastner_labels_file)
print(kastner_labels)

{'0': 'Unknown', '1': 'V1v', '2': 'V1d', '3': 'V2v', '4': 'V2d', '5': 'V3v', '6': 'V3d', '7': 'hV4', '8': 'VO1', '9': 'VO2', '10': 'PHC1', '11': 'PHC2', '12': 'TO2', '13': 'TO1', '14': 'LO2', '15': 'LO1', '16': 'V3B', '17': 'V3A', '18': 'IPS0', '19': 'IPS1', '20': 'IPS2', '21': 'IPS3', '22': 'IPS4', '23': 'IPS5', '24': 'SPL1', '25': 'FEF'}


In [None]:
design_file = pjoin(basedir, 'nsd_design')

In [40]:
def count_roi_voxels_num(roi, label):
    return np.where(roi == label)[0].shape[0]

def generate_list_voxel_3d(roi, label):
    _axis_list = []
    all_3d = np.where(roi == label)
    for i in range(len(all_3d[0])):
        _axis_list.append([all_3d[0][i], all_3d[1][i], all_3d[2][i]])
    return _axis_list

def decompose_3d_to_voxel_id(combined_axis_list, label):
    # create an empty panda dataframe
    _df = pd.DataFrame(columns=['x', 'y', 'z', 'voxel_id', 'label'])
    for i in range(len(combined_axis_list)):
        _df.loc[i] = [combined_axis_list[i][0], combined_axis_list[i][1], combined_axis_list[i][2], i, label]
    return _df

def concat_all_designs(design_file):
    # create an 4d empty array
    _1d_array = np.zeros((750*37))
    ## create strings to read data
    for i in range(37):
        for z in range(12):
            _filename = pjoin(design_file, 'design_session' + str(i+1)+'_run'+str(z+1)+'.tsv')
        ## read tsv data
            _data = pd.read_csv(_filename, sep='\t', header=None).values
    


def concat_all_betas(betas_file):
    # create an 4d empty array
    _4d_array = np.zeros((750*37, 83, 104, 81))
    ## create strings to read data
    for i in range(37):
        _filename = pjoin(betas_file, 'betas_session' + str(i+1)+'.hdf5')
    ## read data
        f = h5py.File(_filename, 'r')
        _data = f[list(f.keys())[0]]
        _4d_array[i*750:(i+1)*750,:,:,:] = _data
    return _4d_array

    

def get_betas_to_3d(df_decompose, stimuli_design_list, stimuli_beta, stimuli_num = 730000):
    voxel_id_list = [i for i in range(len(df_decompose.voxel_id))]
    pd_table = pd.DataFrame(columns=voxel_id_list)

    unique_session_list = np.unique(stimuli_design_list)

    for i in unique_session_list:
        loc_betas_by_session_list = np.where(stimuli_design_list == i)[0]
        single_beta_one_stimuli = []
        for z in range(len(df_decompose.voxel_id)):
            # get the xyz coordinate by voxel_id
            _xyz = df_decompose[df_decompose['voxel_id'] == z][['x', 'y', 'z']].values[0]
            # get the beta value by xyz coordinate
            _beta = np.mean(stimuli_beta[loc_betas_by_session_list,_xyz[2],_xyz[1],_xyz[0]], axis=0)
            single_beta_one_stimuli.append(_beta)
        pd_table.loc[i] = single_beta_one_stimuli

    return pd_table

In [39]:
lh_prf_img = nib.load(lh_prf_visual_file)
rh_prf_img = nib.load(rh_prf_visual_file)
kastner_img = nib.load(kastner_file)



(81, 104, 83)
351
