# THINGS-fMRI usage notes (Modified Version)

### THINGS-fMRI1 b-value extraction notebook

Part of codes are grabbing from the THINGS-data repository: [link](https://github.com/ViCCo-Group/THINGS-data/blob/main/MRI/notebooks/fmri_usage.ipynb)

For a detailed description of the data and the procedures that generated it, see [the THINGS-data preprint](https://doi.org/10.1101/2022.07.22.501123).

In [1]:
from os.path import join as pjoin
import glob
import numpy as np
import pandas as pd
# from nilearn.masking import apply_mask, unmasks
# from nilearn.plotting import plot_epi, plot_stat_map
# from nilearn.image import load_img, index_img, iter_img
# import matplotlib.pyplot as plt
# import cortex
import os
import shutil

## define the path

In [2]:
# Assumes you've downloaded the THINGS-fMRI data to this directory
basedir = '/mnt/c/Users/Wayne/Desktop/FCAnet/'
betas_csv_dir = pjoin(basedir, 'betas_csv')

## Single trial responses

The single trial responses are arguably the easiest way to analyze the THINGS-fMRI data. They contains the magnitude of the fMRI response to each stimulus in each voxel with a single number. The single trial responses are provided in two formats: a) In table format, b) in volumetric format.

### Table format

Besides the fMRI response data, the table format contains metadata about each voxel (such as noise ceilings, pRF parameters, regions of interest) and about the stimulus (such as image file name, trial type, run and session). 

## define a function to get betas

1. function `get_ROI_voxel_id` is to get the voxel id of the ROI
2. function `get_betas` is to get the betas of the ROI, and it will take the mean of the betas within the ROI, so that it will only return a dataframe with 4 ROIs.

In [3]:
def get_ROI_voxel_id(voxdata, region):
    return np.array(voxdata[voxdata[region] == 1]["voxel_id"].values)

def get_betas(betas_csv_dir, sub):
    data_file = pjoin(betas_csv_dir, f'sub-{sub}_ResponseData.h5')
    responses = pd.read_hdf(data_file)  # this may take a minute
    # find voxel ids for each ROI
    vox_f = pjoin(betas_csv_dir, f'sub-{sub}_VoxelMetadata.csv')
    voxdata = pd.read_csv(vox_f)

    v1_voxel_id = get_ROI_voxel_id(voxdata, "V1")
    v2_voxel_id = get_ROI_voxel_id(voxdata, "V2")
    hv4_voxel_id = get_ROI_voxel_id(voxdata, "hV4")
    it_l_voxel_id = get_ROI_voxel_id(voxdata, "lLOC")
    it_r_voxel_id = get_ROI_voxel_id(voxdata, "rLOC")

    it_voxel_id = np.concatenate((it_l_voxel_id, it_r_voxel_id))
    # take mean in pandas dataframe along axis 1 (i.e. across rows)
    responses_v1 = responses[responses["voxel_id"].isin(v1_voxel_id)].mean(axis=0).to_numpy()
    responses_v2 = responses[responses["voxel_id"].isin(v2_voxel_id)].mean(axis=0).to_numpy()
    responses_hv4 = responses[responses["voxel_id"].isin(hv4_voxel_id)].mean(axis=0).to_numpy()
    responses_it = responses[responses["voxel_id"].isin(it_voxel_id)].mean(axis=0).to_numpy()
    # concatenate all the ROI responses into pandas dataframe
    responses_df = pd.DataFrame({"V1": responses_v1[1:], "V2": responses_v2[1:], "hV4": responses_hv4[1:], "IT": responses_it[1:]}).T
    return responses_df
    


## define functions to read stimulus csv
1. function `get_stimulus` is to get the stimulus csv file
2. function `get_stimulus_info` is to sort the stimulus across subjects, so that they can have same image order to be aligned in further analysese.

In [4]:
def get_stim_table(betas_csv_dir, sub):# Stimulus metadata
    stim_f = pjoin(betas_csv_dir, f'sub-{sub}_StimulusMetadata.csv')
    stimdata = pd.read_csv(stim_f)
    return stimdata

# rank the stimdata by alphabetical order of "stimulus"
def get_stim_id_ranked(betas_csv_dir, sub):
    stimdata = get_stim_table(betas_csv_dir, sub)
    stimdata["stimulus"] = stimdata["stimulus"].astype("category")
    stimdata["stimulus"] = stimdata["stimulus"].cat.set_categories(stimdata["stimulus"].unique(), ordered=True)
    stimdata = stimdata.sort_values(by=["stimulus"])
    return stimdata


## define function to get final beta values of training and testing stimulus for each subject

1. function `get_averaged_responses` is a function to get the averaged responses based on the calculation results from previous functions. The final output would be two dataframes, one for training data, the other for testing data.

In [5]:
def get_averaged_responses(responses_df, stimulus_table):
    aligned = responses_df.iloc[:, stimulus_table["trial_id"]]
    # concatenate all the ROI responses into stimulus_01
    stimulus_with_rois = pd.concat([stimulus_table, aligned.T], axis=1)
    # ignore the index
    stimulus_with_rois = stimulus_with_rois.reset_index(drop=True)
    aligned_dict_train = {}
    aligned_dict_test = {}
    for index, pic in enumerate(stimulus_with_rois["stimulus"]):
        aligned_dict_train[pic] = stimulus_with_rois[["V1", "V2", "hV4", "IT"]].iloc[index].to_numpy()
        if str(stimulus_with_rois["trial_type"][index]) == "test":
            # take mean of all test trials with same stimulus name
            aligned_dict_test[pic] = np.mean(stimulus_with_rois[stimulus_with_rois["stimulus"] == pic][["V1", "V2", "hV4", "IT"]].to_numpy(), axis=0)

    # convert aligned_dict to pandas dataframe
    aligned_df_train = pd.DataFrame.from_dict(aligned_dict_train, orient="index", columns=["V1", "V2", "hV4", "IT"])
    aligned_df_test = pd.DataFrame.from_dict(aligned_dict_test, orient="index", columns=["V1", "V2", "hV4", "IT"])
    return aligned_df_train, aligned_df_test


## averaging across three subjects
This step is to average the beta values across three subjects, so that we can get the final beta values for each stimulus.

In [6]:
train = np.zeros((8740, 4, 3))
test = np.zeros((100, 4, 3))
sub_pool = ['01', '02', '03']
def get_each_subj_betas_values(betas_csv_dir, sub):
    responses_df = get_betas(betas_csv_dir, sub)
    stimulus_table = get_stim_id_ranked(betas_csv_dir, sub)
    aligned_train_df, aligned_test_df = get_averaged_responses(responses_df, stimulus_table)
    return aligned_train_df, aligned_test_df

In [6]:
for index, sub in enumerate(sub_pool):
    aligned_train_df, aligned_test_df = get_each_subj_betas_values(betas_csv_dir, sub)
    train[:,:,index] = aligned_train_df.to_numpy()
    test[:,:,index] = aligned_test_df.to_numpy()

averaged_train = np.mean(train, axis=2)
averaged_test = np.mean(test, axis=2)

averaged_train_df = pd.DataFrame(averaged_train, columns=["V1", "V2", "hV4", "IT"], index=aligned_train_df.index)
averaged_test_df = pd.DataFrame(averaged_test, columns=["V1", "V2", "hV4", "IT"], index=aligned_test_df.index)

print(averaged_train_df, averaged_test_df)

  responses = pd.read_hdf(data_file)  # this may take a minute


## Additional step to get the individual FC values

In [9]:
train = np.zeros((8740, 4, 3))
test = np.zeros((100, 4, 3))
sub_pool = ['01', '02', '03']
pd_corr = pd.DataFrame(columns=["sub", "v1_v2_train_fc", "v1_v2_test_fc", "v2_v4_train_fc", "v2_v4_test_fc", "v4_it_train_fc", "v4_it_test_fc"])
for index, sub in enumerate(sub_pool):
    single_subj_train, single_subj_test = get_each_subj_betas_values(betas_csv_dir, sub)
    v1_v2_train_fc = np.corrcoef(single_subj_train["V1"], single_subj_train["V2"])[0,1]
    v1_v2_test_fc = np.corrcoef(single_subj_test["V1"], single_subj_test["V2"])[0,1]
    v2_v4_train_fc = np.corrcoef(single_subj_train["V2"], single_subj_train["hV4"])[0,1]
    v2_v4_test_fc = np.corrcoef(single_subj_test["V2"], single_subj_test["hV4"])[0,1]
    v4_it_train_fc = np.corrcoef(single_subj_train["hV4"], single_subj_train["IT"])[0,1]
    v4_it_test_fc = np.corrcoef(single_subj_test["hV4"], single_subj_test["IT"])[0,1]
    # create a dataframe to store the correlation values
    pd_corr.loc[index] = [sub, v1_v2_train_fc, v1_v2_test_fc, v2_v4_train_fc, v2_v4_test_fc, v4_it_train_fc, v4_it_test_fc]
print(pd_corr)

  responses = pd.read_hdf(data_file)  # this may take a minute
  responses = pd.read_hdf(data_file)  # this may take a minute
  responses = pd.read_hdf(data_file)  # this may take a minute


  sub  v1_v2_train_fc  v1_v2_test_fc  v2_v4_train_fc  v2_v4_test_fc  \
0  01        0.841729       0.808644        0.861250       0.787666   
1  02        0.800027       0.837186        0.796521       0.692329   
2  03        0.829808       0.841350        0.818807       0.706831   

   v4_it_train_fc  v4_it_test_fc  
0        0.721607       0.488990  
1        0.811766       0.871636  
2        0.658672       0.651595  


In [12]:


pd_corr_random = pd.DataFrame(columns = ["sub", "v1_v2_top_fc", "v1_v2_bottom_fc", "v2_v4_top_fc", "v2_v4_bottom_fc", "v4_it_top_fc", "v4_it_bottom_fc"])
for index, sub in enumerate(sub_pool):
    single_subj_train, single_subj_test = get_each_subj_betas_values(betas_csv_dir, sub)
    # randomly select 50% of the stimuli to calculate the correlation between v1 and v2
    random_stim = np.random.choice(single_subj_train.index, size=4370, replace=False)
    v1_v2_top_fc = np.corrcoef(single_subj_train.loc[random_stim]["V1"], single_subj_train.loc[random_stim]["V2"])[0,1]
    v2_v4_top_fc = np.corrcoef(single_subj_train.loc[random_stim]["V2"], single_subj_train.loc[random_stim]["hV4"])[0,1]
    v4_it_top_fc = np.corrcoef(single_subj_train.loc[random_stim]["hV4"], single_subj_train.loc[random_stim]["IT"])[0,1]
    # caluclate the rest 50% of the stimuli and their correlations between v1 and v2
    rest_stim = np.setdiff1d(single_subj_train.index, random_stim)
    v1_v2_bottom_fc = np.corrcoef(single_subj_train.loc[rest_stim]["V1"], single_subj_train.loc[rest_stim]["V2"])[0,1]
    v2_v4_bottom_fc = np.corrcoef(single_subj_train.loc[rest_stim]["V2"], single_subj_train.loc[rest_stim]["hV4"])[0,1]
    v4_it_bottom_fc = np.corrcoef(single_subj_train.loc[rest_stim]["hV4"], single_subj_train.loc[rest_stim]["IT"])[0,1]
    # create a dataframe to store the correlation values
    pd_corr_random.loc[index] = [sub, v1_v2_top_fc, v1_v2_bottom_fc, v2_v4_top_fc, v2_v4_bottom_fc, v4_it_top_fc, v4_it_bottom_fc]
print(pd_corr_random)

  responses = pd.read_hdf(data_file)  # this may take a minute
  responses = pd.read_hdf(data_file)  # this may take a minute
  responses = pd.read_hdf(data_file)  # this may take a minute


  sub  v1_v2_top_fc  v1_v2_bottom_fc  v2_v4_top_fc  v2_v4_bottom_fc  \
0  01      0.840363         0.842949      0.859026         0.863281   
1  02      0.788927         0.810224      0.789186         0.803275   
2  03      0.821579         0.837892      0.818166         0.819586   

   v4_it_top_fc  v4_it_bottom_fc  
0      0.717762         0.725091  
1      0.801838         0.821030  
2      0.650278         0.666983  


## Additional step: get the categorical information of the stimuli

In [None]:
# get all characters before last 8 characters
def get_stimulus_name(stimdata):
    return np.array([stim[:-8] for stim in stimdata["stimulus"].values])

['alligator' 'altar' 'ashtray' 'axe' 'bamboo' 'banana' 'beachball' 'bean'
 'beaver' 'bed' 'beer' 'bench' 'bike' 'blind' 'boa' 'boat' 'bobsled'
 'brace' 'brownie' 'bulldozer' 'butterfly' 'candelabra' 'cheese' 'chest1'
 'chipmunk' 'clipboard' 'coat_rack' 'cookie' 'cow' 'crank' 'crayon'
 'cufflink' 'donut' 'dough' 'dragonfly' 'drain' 'drawer' 'dress' 'earring'
 'easel' 'ferris_wheel' 'footprint' 'fudge' 'graffiti' 'grape' 'grate'
 'guacamole' 'headlamp' 'helicopter' 'hippopotamus' 'horse' 'horseshoe'
 'hovercraft' 'hula_hoop' 'iguana' 'jam' 'jar' 'joystick' 'kazoo' 'key'
 'kimono' 'lasagna' 'lemonade' 'mango' 'marshmallow' 'microscope' 'monkey'
 'mosquito_net' 'mousetrap' 'nest' 'pacifier' 'pan' 'peach' 'pear' 'piano'
 'pumpkin' 'quill' 'rabbit' 'ribbon' 'seesaw' 'shredder' 'sim_card'
 'speaker' 'spoon' 'stalagmite' 'starfish' 'streetlight' 't-shirt'
 'tamale' 'television' 'tent' 'typewriter' 'umbrella' 'uniform' 'urinal'
 'wallpaper' 'wasp' 'watch' 'whip' 'wig']


### Move pics to folder

In [None]:
pic_path = "/mnt/c/Users/Wayne/Desktop/FCAnet_stimulus/sub-01/"

# if the folder name is matched with categories, copy the folder to new folder "FCAnet" at Desktop
import os
import shutil

# Specify the source directory containing subfolders
source_dir = pic_path

# Specify the destination directory where files will be moved
destination_dir = '/mnt/c/Users/Wayne/Desktop/all_pics/'

# Create the destination directory if it doesn't exist
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Iterate through each subdirectory in the source directory
for root, dirs, files in os.walk(source_dir):
    for file in files:
        # Construct the full path of the file
        file_path = os.path.join(root, file)
        # Move the file to the destination directory
        shutil.copy(file_path, destination_dir)

In [None]:
train_dir = '/mnt/c/Users/Wayne/Desktop/FCAnet_stimulus/train'
test_dir = '/mnt/c/Users/Wayne/Desktop/FCAnet_stimulus/test'

for index, i in enumerate(aligned_train_df.index):
    # rename the file name to match the index of aligned_train_df
    print(f"train {i}")
    new_name = f"{index:04d}.jpg"
    shutil.copy(os.path.join(destination_dir, i), train_dir)
    os.rename(os.path.join(train_dir, i), os.path.join(train_dir, new_name))

for index, i in enumerate(aligned_test_df.index):
    # rename the file name to match the index of aligned_train_df
    print(f"test {i}")
    new_name = f"{index:04d}.jpg"
    shutil.copy(os.path.join(destination_dir, i), test_dir)
    os.rename(os.path.join(test_dir, i), os.path.join(test_dir, new_name))


train acorn_01b.jpg
train acorn_02n.jpg
train acorn_03s.jpg
train acorn_04s.jpg
train acorn_05s.jpg
train acorn_06s.jpg
train acorn_07s.jpg
train acorn_08s.jpg
train acorn_09s.jpg
train acorn_10s.jpg
train acorn_11s.jpg
train acorn_12s.jpg
train airbag_01b.jpg
train airbag_02s.jpg
train airbag_03s.jpg
train airbag_04s.jpg
train airbag_05s.jpg
train airbag_06s.jpg
train airbag_07s.jpg
train airbag_08s.jpg
train airbag_09s.jpg
train airbag_10s.jpg
train airbag_11s.jpg
train airbag_12s.jpg
train aircraft_carrier_01b.jpg
train aircraft_carrier_02s.jpg
train aircraft_carrier_03s.jpg
train aircraft_carrier_04s.jpg
train aircraft_carrier_05s.jpg
train aircraft_carrier_06s.jpg
train aircraft_carrier_07s.jpg
train aircraft_carrier_08s.jpg
train aircraft_carrier_09s.jpg
train aircraft_carrier_10s.jpg
train aircraft_carrier_11s.jpg
train aircraft_carrier_12s.jpg
train airplane_01b.jpg
train airplane_02n.jpg
train airplane_03n.jpg
train airplane_04n.jpg
train airplane_05n.jpg
train airplane_06n.jp

In [None]:
# save the averaged_train_df and averaged_test_df to csv
# averaged_train_df.to_csv(pjoin(betas_csv_dir, 'averaged_train_df.csv'))
# averaged_test_df.to_csv(pjoin(betas_csv_dir, 'averaged_test_df.csv'))

The `sub-{subject}_ResponseData.h5` files contain the actual single trial responses. Rows are voxels, columns are trials.

> 🚨 **Trial types**
>
> The THINGS-fMRI experiment presented participants with three different trial types:
> - `train`: Participants passively viewed an object image.
> - `test`: Same as train, but these trials belonged to a set of 200 images which were presented in each session. It's main purpose is to allow for estimating the reliability of the single trial responses in a given voxel.
> - `catch`: Participants saw a non-object image and responded with a button press. This was included to ensure participants were engaged throughout the experiment.
>
> Note: Catch trials are excluded from the single trial responses in table format as they are likely not of interest for most applications. However, catch trials are included in the volumetric format in order to make it possible to account for them in analyses.

## Additional step: get the voxel-level information of the betas to calculate rsa in the next step
workflow:
1. read the h5 file, get all the voxel and trial data
2. read the stimulus data and sorted the data with alphabetical order
3. get the trial id of the test data
4. map it to the voxel data, and get the trial of testing data of each voxel.

In [14]:
data_file = pjoin(betas_csv_dir, f'sub-{sub}_ResponseData.h5')
responses = pd.read_hdf(data_file)
print(responses.iloc[7942, 1930])

  responses = pd.read_hdf(data_file)


-0.0077116936


In [None]:
import collections
vox_f = pjoin(betas_csv_dir, f'sub-{sub}_VoxelMetadata.csv')
voxdata = pd.read_csv(vox_f)
v1_voxel_id = get_ROI_voxel_id(voxdata, "V1")
v2_voxel_id = get_ROI_voxel_id(voxdata, "V2")
hv4_voxel_id = get_ROI_voxel_id(voxdata, "hV4")
it_l_voxel_id = get_ROI_voxel_id(voxdata, "lLOC")
it_r_voxel_id = get_ROI_voxel_id(voxdata, "rLOC")
it_voxel_id = np.concatenate((it_l_voxel_id, it_r_voxel_id))
four_regions_voxel_id = np.concatenate((v1_voxel_id, v2_voxel_id, hv4_voxel_id, it_voxel_id))
# find repeated voxel_id
repeated_voxel_id = np.array([item for item, count in collections.Counter(four_regions_voxel_id).items() if count > 1])
print(repeated_voxel_id)

[ 23541  26734  26735  30070  30071  30111  30153  33493  33494  33533
  33534  33621  44622  44623 156054 156103 156104 156155 159926 159970
 159971 160018 160019 160020 160021 160070 160071 160122 163817 163818
 163819 163862 163863 163864 163865 163914 167585 167631 171322 181781
 181782 185048]


In [13]:
def get_memebership(voxel_id, v1_voxel_id, v2_voxel_id, hv4_voxel_id, it_voxel_id):
    if voxel_id in v1_voxel_id:
        return "V1"
    elif voxel_id in v2_voxel_id:
        return "V2"
    elif voxel_id in hv4_voxel_id:
        return "hV4"
    elif voxel_id in it_voxel_id:
        return "IT"
    elif voxel_id in hv4_voxel_id and voxel_id in it_voxel_id:
        return "hV4_IT"
    else:
        return "None"


def get_voxel_betas(betas_csv_dir, sub, trial_type="test"):
    data_file = pjoin(betas_csv_dir, f'sub-{sub}_ResponseData.h5')
    responses = pd.read_hdf(data_file)  # this may take a minute
    # find voxel ids for each ROI
    vox_f = pjoin(betas_csv_dir, f'sub-{sub}_VoxelMetadata.csv')
    voxdata = pd.read_csv(vox_f)
    v1_voxel_id = get_ROI_voxel_id(voxdata, "V1")
    v2_voxel_id = get_ROI_voxel_id(voxdata, "V2")
    hv4_voxel_id = get_ROI_voxel_id(voxdata, "hV4")
    it_l_voxel_id = get_ROI_voxel_id(voxdata, "lLOC")
    it_r_voxel_id = get_ROI_voxel_id(voxdata, "rLOC")
    it_voxel_id = np.concatenate((it_l_voxel_id, it_r_voxel_id))

    # concatenate all the regions into one array
    four_regions_voxel_id = np.concatenate((v1_voxel_id, v2_voxel_id, hv4_voxel_id, it_voxel_id))
    # filter out voxels that are in the four_regions_voxel_id
    responses_visual = responses[responses["voxel_id"].isin(four_regions_voxel_id)]
    # label each voxel with its region
    voxel_labels = []
    for i in responses_visual["voxel_id"].values:
        voxel_labels.append(get_memebership(i, v1_voxel_id, v2_voxel_id, hv4_voxel_id, it_voxel_id))

    # import stimulus data
    tmp_sorted_stimulus = get_stim_id_ranked(betas_csv_dir, sub)
    # only select trial_type == test
    stimulus = tmp_sorted_stimulus[tmp_sorted_stimulus["trial_type"] == trial_type]
    # get unique test pictures
    pic_names = np.unique(stimulus["stimulus"].values)
    # the column is number of test pictures, and the row is number of voxels
    betas_all_voxels = np.ones((len(responses_visual.index), len(pic_names)))
    for index, pic in enumerate(pic_names):
        # get the trial_id of same stimulus in the stimulus_test
        _trial_id = stimulus[stimulus["stimulus"] == pic]["trial_id"].values
        # map it to the responses
        betas_all_voxels[:, index] = responses_visual.iloc[:, _trial_id].mean(axis=1)
    betas_all_voxels_df = pd.DataFrame(betas_all_voxels, columns=[i for i in range(0, len(pic_names))], index=responses_visual.index)
    # rename columns names
    betas_all_voxels_df.columns = ['{:04d}'.format(i) for i in range(0, len(pic_names))]
    # add a new column with four_regions_voxel_id
    betas_all_voxels_df["voxel_labels"] = voxel_labels
    return betas_all_voxels_df

# iterate and average all subs
sub_pool = ['01', '02', '03']
# test_voxels_all_subs = np.zeros((211339, 100, 3))
for index, sub in enumerate(sub_pool):
    _sub = get_voxel_betas(betas_csv_dir, sub, trial_type="train")
    # save as csv in the desktop
    _sub.to_csv(pjoin(betas_csv_dir, f'sub-{sub}_all_voxels_train_betas.csv'))

  responses = pd.read_hdf(data_file)  # this may take a minute
  responses = pd.read_hdf(data_file)  # this may take a minute
  responses = pd.read_hdf(data_file)  # this may take a minute


In [None]:
print(['{:04d}'.format(i) for i in range(0, 10)])

['0000', '0001', '0002', '0003', '0004', '0005', '0006', '0007', '0008', '0009']
