In [17]:
import os
import json
import numpy as np
import config,consts,paths
from decoding.utils_stim import get_wordseqs, get_roi_features
from decoding.utils_resp import get_resp
from encoding.DataSequence import DataSequence
from encoding.util import make_delayed
from encoding.ridge import bootstrap_ridge
from utils import flatten_list, save_data


# Create reference data 

In [18]:
subject='UTS03'

In [19]:
# load data
stories = flatten_list(consts.STORIES)
wordseqs = get_wordseqs(stories, "story")
with open(os.path.join(config.DATA_TRAIN_DIR, "ROIs", f"{subject}.json"), "r") as f:
    roi_voxels = json.load(f)

In [21]:
# features
resp_full = get_resp(subject, stories, "story", stack = True)
resp_roi, tr_stats = get_roi_features(resp_full, roi_voxels, consts.ROI)
del resp_full
delresp = make_delayed(resp_roi, config.RESP_DELAYS)

In [22]:
# target
rates = {}
for story in stories:
    ds = wordseqs[story]
    words = DataSequence(np.ones(len(ds.data_times)), ds.split_inds, ds.data_times, ds.tr_times)
    rates[story] = words.chunksums("lanczos", window = 3)
nz_rate = np.nan_to_num(np.concatenate([rates[story][5+config.STRIM:-config.STRIM] for story in stories], 
        axis = 0).reshape([-1, 1]))
mean_rate = np.mean(nz_rate)
rate = nz_rate - mean_rate

In [23]:
save_location=os.path.join(paths.WR % subject)
#(subject_dir, "%s.hf5" % stim)

In [24]:
# estimate word rate model
weights, _, _ = bootstrap_ridge(delresp, rate, alphas = config.ALPHAS_WR, nboots = config.NBOOTS, 
        chunklen = config.CHUNKLEN, use_corr = False, seed = 42)
wr_decoder = dict(zip(['weights', 'mean', 'rois', 'stories', 'tr_stats'], 
        [weights, mean_rate, consts.ROI, stories, tr_stats]))
save_data(os.path.join(paths.WR % subject), wr_decoder)