In [None]:
import os
import glob
import numpy as np
import pandas as pd
from config.dir import DATA_DIR, EM_DATA_DIR ,REPO_DIR
from utils.stimulus_utils import load_textgrids, load_simulated_trfiles
from utils.textgrid import TextGrid
from utils.dsutils import make_semantic_model, make_word_ds, make_phoneme_ds
from utils.npp import zscore, mcorr
import json
from utils.SemanticModel import SemanticModel
from utils.interpdata import lanczosinterp2D
import pathlib	
import h5py

In [2]:
#! pip3 install numpy pandas voxelwise_tutorials

# Variables

In [31]:
trim = 5
#ndelays = 4
feature ="eng1000"
subject = 'sub-UTS02'
sessions = ['10','11']

In [3]:
def get_story_wordseqs(stories):
	grids = load_textgrids(stories, DATA_DIR)
	with open( os.path.join(DATA_DIR, "ds003020/derivative/respdict.json"), "r") as f:
		respdict = json.load(f)
	trfiles = load_simulated_trfiles(respdict)
	wordseqs = make_word_ds(grids, trfiles)
	return wordseqs

In [4]:
with open(os.path.join(EM_DATA_DIR, "sess_to_story.json"), "r") as f:
		sess_to_story = json.load(f) 

In [6]:
train_stories, test_stories = [], []
for sess in sessions:
    stories, tstory = sess_to_story[sess][0], sess_to_story[sess][1]
    train_stories.extend(stories)
    if tstory not in test_stories:
        test_stories.append(tstory)
assert len(set(train_stories) & set(test_stories)) == 0, "Train - Test overlap!"


In [7]:
allstories = list(set(train_stories) | set(test_stories))

In [8]:
save_location = os.path.join(REPO_DIR, "results",feature, subject)
print("Saving encoding model & results to:", save_location)
os.makedirs(save_location, exist_ok=True)


Saving encoding model & results to: /Users/genevievelam/Documents/GitHub/stories_fmri/results/eng1000/sub-UTS02


In [9]:
def downsample_word_vectors(stories, word_vectors, wordseqs):
	"""Get Lanczos downsampled word_vectors for specified stories.

	Args:
		stories: List of stories to obtain vectors for.
		word_vectors: Dictionary of {story: <float32>[num_story_words, vector_size]}

	Returns:
		Dictionary of {story: downsampled vectors}
	"""
	downsampled_semanticseqs = dict()
	for story in stories:
		downsampled_semanticseqs[story] = lanczosinterp2D(
			word_vectors[story], wordseqs[story].data_times, 
			wordseqs[story].tr_times, window=3)
	return downsampled_semanticseqs

In [10]:
def get_eng1000_vectors(allstories):
	"""Get Eng1000 vectors (985-d) for specified stories.

	Args:
		allstories: List of stories to obtain vectors for.

	Returns:
		Dictionary of {story: downsampled vectors}
	"""
	eng1000 = SemanticModel.load(os.path.join(EM_DATA_DIR, "english1000sm.hf5"))
	wordseqs = get_story_wordseqs(allstories)
	vectors = {}
	for story in allstories:
		sm = make_semantic_model(wordseqs[story], [eng1000], [985])
		vectors[story] = sm.data
	return downsample_word_vectors(allstories, vectors, wordseqs)

In [11]:
_FEATURE_CONFIG = {
	#"articulation": get_articulation_vectors,
	#"phonemerate": get_phonemerate_vectors,
	#"wordrate": get_wordrate_vectors,
	"eng1000": get_eng1000_vectors,
}

In [12]:
def get_feature_space(feature, *args):
	return _FEATURE_CONFIG[feature](*args)

In [32]:
downsampled_feat = get_feature_space(feature, allstories)
#print("Stimulus & Response parameters:")
#print("trim: %d, ndelays: %d" % (trim, ndelays))

In [18]:
def convert_to_serializable(downsampled_feat):
    """Convert downsampled feature dictionary to a serializable format."""
    
    serializable_dict = downsampled_feat.tolist()

    return serializable_dict


In [20]:
def apply_zscore_and_hrf(stories, downsampled_feat, trim):
	"""Get (z-scored and delayed) stimulus for train and test stories.
	The stimulus matrix is delayed (typically by 2,4,6,8 secs) to estimate the
	hemodynamic response function with a Finite Impulse Response model.

	Args:
		stories: List of stimuli stories.

	Variables:
		downsampled_feat (dict): Downsampled feature vectors for all stories.
		trim: Trim downsampled stimulus matrix.
		delays: List of delays for Finite Impulse Response (FIR) model.

	Returns:
		delstim: <float32>[TRs, features * ndelays]
	"""
	stim = [zscore(downsampled_feat[s][5+trim:-trim]) for s in stories]
	stim = np.vstack(stim)
	#delays = range(1, ndelays+1)
	#delstim = make_delayed(stim, delays)
	return stim#delstim

In [21]:
delRstim = apply_zscore_and_hrf(train_stories, downsampled_feat, trim)

In [22]:
delRstim.shape

(3705, 985)

In [None]:
def get_response(stories, subject):
	"""Get the subject"s fMRI response for stories."""
	#main_path = pathlib.Path(__file__).parent.parent.resolve()
	subject_x = subject.split('-')[1]
	subject_dir = os.path.join(DATA_DIR, "ds003020/derivative/preprocessed_data/%s" % subject_x)
	base = subject_dir
	resp = []
	run_on_set = []
	for story in stories:
		resp_path = os.path.join(base, "%s.hf5" % story)
		hf = h5py.File(resp_path, "r")
		resp.extend(hf["data"][:])
		if not run_on_set:
			run_on_set.append(hf["data"][:].shape[0])
		else:
			run_on_set.append(run_on_set[-1]+hf["data"][:].shape[0])
		print(hf["data"][:].shape[0], "for story:", story)
		hf.close()
	return np.array(resp), run_on_set[:-1]

In [26]:
# Response
zRresp,run_on_set = get_response(train_stories, subject)
print("zRresp: ", zRresp.shape)


326 for story: catfishingstrangerstofindmyself
243 for story: christmas1940
326 for story: gpsformylostidentity
297 for story: singlewomanseekingmanwich
325 for story: superheroesjustforeachother
307 for story: whenmothersbullyback
170 for story: againstthewind
409 for story: bluehope
237 for story: forgettingfear
249 for story: ifthishaircouldtalk
327 for story: lifereimagined
489 for story: stumblinginthedark
zRresp:  (3705, 94251)


In [28]:
run_on_set

[326, 569, 895, 1192, 1517, 1824, 1994, 2403, 2640, 2889, 3216]

In [27]:
with open(save_location+'/run_on.json', "w") as file:
    json.dump(run_on_set,file, indent=4)

In [None]:
#save_location = os.path.join(REPO_DIR, "results",feature, 'sub-UTS03')
#print("Saving encoding model & results to:", save_location)
#os.makedirs(save_location, exist_ok=True)

#with open(save_location+'/fmri.json', "w") as file:
#        json.dump(convert_to_serializable(zRresp),file, indent=4)

Saving encoding model & results to: /Users/genevievelam/Documents/GitHub/stories_fmri/results/eng1000/sub-UTS03


# Save feature 

In [None]:
#with open(save_location+'/features.json', "w") as file:
#        json.dump(convert_to_serializable(delRstim),file, indent=4)

In [30]:
#with open(save_location+'/fmri.json', "w") as file:
#        json.dump(convert_to_serializable(zRresp),file, indent=4)