In [18]:
import os
import json
import argparse
import numpy as np
from config import DATA_DIR
import h5py
from utils import flatten_list, save_data
import config, consts, paths
from encoding.ridge import bootstrap_ridge
from decoding.StimulusModel import StimulusModel, get_lanczos_mat, affected_trs, LMFeatures
from decoding.utils_stim import predict_word_rate, predict_word_times
from decoding.utils_resp import get_resp, get_resp_test
from utils import nsort, flatten_list
from decoding.GPT import GPT
from encoding.npp import zscore
from decoding.Decoder import Decoder, Hypothesis
from decoding.LanguageModel import LanguageModel
from decoding.EncodingModel import EncodingModel

In [2]:
stories = flatten_list(consts.STORIES)
stimuli = stories
modality = "story"
goal = "UTS03"
exclude = None
references = ["UTS02"]#, "UTS03" "UTS09"]

In [None]:
def get_reverse_corrs(goal, references, resp, cache = None):
    """select goal voxels by fitting reverse converters from references to goal
    """
       
    # fit converters from references to goal
    rconverters = {}
    for reference in references:
        rvox = np.load(paths.EM % reference, allow_pickle = True).item()['voxels']
        gresp_align = resp[goal]
        rresp_align = resp[reference][:, rvox]
        converter, _, _ = bootstrap_ridge(rresp_align, gresp_align, alphas = config.ALPHAS,
                nboots = config.NBOOTS, chunklen = config.CHUNKLEN, use_corr = True, seed = 42)            
        rconverters[reference] = (converter, rvox)        

    # compare aligned responses across converters
    stories = flatten_list(consts.STORIES[3:])
    reverse_corrs = []
    for story in stories:
        rsim = []
        for reference in references:
            rvox = rconverters[reference][1]
            rresp = get_resp(reference, [story], "story", stack = True, voxels = rvox)
            rsim.append(zscore(rresp.dot(rconverters[reference][0])))
        for c1 in range(len(rsim)):
            for c2 in range(c1+1, len(rsim)):
                reverse_corrs.append((rsim[c1] * rsim[c2]).mean(0))
    return np.mean(reverse_corrs, axis = 0)


In [36]:
run_on_set = []
for story in stories:
		temp= config.DATA_DIR+'/derivative/preprocessed_data/'+references[0]
		resp_path = os.path.join(temp,"%s.hf5" % story)
		hf = h5py.File(resp_path, "r")
		if not run_on_set:
			run_on_set.append(hf["data"][:].shape[0])
		else:
			run_on_set.append(run_on_set[-1]+hf["data"][:].shape[0])
		hf.close()

run_on_set=run_on_set[:-1]

In [4]:
# load converter data
resp_align = {}
for subject in consts.SUBJECTS:
    #resp_full = get_resp(subject, stories, "story", stack = True)
    resp_align[subject] = get_resp(subject, stimuli, modality, stack = True)
reverse_corrs = get_reverse_corrs(goal, [subject for subject in consts.SUBJECTS if subject != goal], resp_align)
if exclude is not None:
    exclude_mask = np.load(paths.ROI % (goal, exclude))
    reverse_corrs[exclude_mask] = -1
gvox = nsort(np.argsort(reverse_corrs)[-15000:])

[]


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [None]:
reference ="UTS02"
rvox = np.load(paths.EM % reference, allow_pickle = True).item()['voxels']
gresp_align = resp_align[goal][:, gvox]
rresp_align = resp_align[reference][:, rvox]

converter, _, allRcorrs = bootstrap_ridge(gresp_align, rresp_align, alphas = config.ALPHAS, 
        nboots = config.NBOOTS, chunklen = config.CHUNKLEN, use_corr = True, seed = 42)  

In [5]:
# train converters
converters = {}
for reference in references:
    rvox = np.load(paths.EM % reference, allow_pickle = True).item()['voxels']
    gresp_align = resp_align[goal][:, gvox]
    rresp_align = resp_align[reference][:, rvox]
    converter, _, allRcorrs = bootstrap_ridge(gresp_align, rresp_align, alphas = config.ALPHAS, 
            nboots = config.NBOOTS, chunklen = config.CHUNKLEN, use_corr = True, seed = 42)            
    converters[reference] = (converter, gvox, rvox)     

# train model 

In [38]:
from himalaya.kernel_ridge import KernelRidgeCV
import os
import json
from sklearn.preprocessing import StandardScaler
from voxelwise_tutorials.delayer import Delayer
from himalaya.kernel_ridge import KernelRidgeCV
from himalaya.backend import set_backend
from sklearn.pipeline import make_pipeline
from voxelwise_tutorials.utils import generate_leave_one_run_out
from sklearn.model_selection import check_cv
import cortex
import numpy as np
import matplotlib.pyplot as plt

In [None]:
train=gresp_align
X_train = np.array(X_train, dtype=float)
print("(n_samples_train, n_features) =", X_train.shape)

In [None]:
scores_train = pipeline.score(X_train, Y_train)

In [15]:
rresp_align.shape

(1869, 10000)

# enbedding not needed 

In [None]:
# load responses
gresp = get_resp_test(goal, repeat = "first")
# load models
em = {}
wr_data = []
for reference in references:
    wr_data.append(np.load(paths.WR % reference, allow_pickle = True).item())
    em_data = np.load(paths.EM % reference, allow_pickle = True).item()
    converter, gvox, rvox = converters[reference]
    rresp = np.nan_to_num(zscore(gresp[:, gvox].dot(converter)))
    em[reference] = EncodingModel(rresp, em_data["weights"], em_data["noise_model"], device = "cuda")
    em[reference].set_shrinkage(config.NM_ALPHA)
    tr_stats, word_stats = np.array(em_data["tr_stats"]), em_data["word_stats"][0]

In [None]:
# predict word times
with open(os.path.join(config.DATA_TRAIN_DIR, "ROIs", f"{goal}.json"), "r") as f:
    roi_vox = json.load(f)
starttime = -10
word_rate = predict_word_rate(gresp, roi_vox, wr_data)
word_times, tr_times = predict_word_times(word_rate, gresp, starttime = starttime)
lanczos_mat = get_lanczos_mat(word_times, tr_times)