In [9]:
import os
import json
import argparse
import numpy as np
from utils import flatten_list, save_data
import config, consts, paths
from encoding.ridge import bootstrap_ridge
from decoding.StimulusModel import StimulusModel, get_lanczos_mat, affected_trs, LMFeatures
from decoding.utils_stim import predict_word_rate, predict_word_times
from decoding.utils_resp import get_resp, get_resp_test
from utils import nsort, flatten_list
from decoding.GPT import GPT
from encoding.npp import zscore
from decoding.Decoder import Decoder, Hypothesis
from decoding.LanguageModel import LanguageModel
from decoding.EncodingModel import EncodingModel




In [2]:
stories = flatten_list(consts.STORIES)
stimuli = stories
modality = "story"
goal = "UTS01"
exclude = None
references = ["UTS02", "UTS03"]#, "UTS09"]


In [11]:
def get_reverse_corrs(goal, references, resp, cache = None):
    """select goal voxels by fitting reverse converters from references to goal
    """
       
    # fit converters from references to goal
    rconverters = {}
    for reference in references:
        rvox = np.load(paths.EM % reference, allow_pickle = True).item()['voxels']
        gresp_align = resp[goal]
        rresp_align = resp[reference][:, rvox]
        converter, _, _ = bootstrap_ridge(rresp_align, gresp_align, alphas = config.ALPHAS,
                nboots = config.NBOOTS, chunklen = config.CHUNKLEN, use_corr = True, seed = 42)            
        rconverters[reference] = (converter, rvox)        

    # compare aligned responses across converters
    stories = flatten_list(consts.STORIES[3:])
    print(stories)
    reverse_corrs = []
    for story in stories:
        rsim = []
        for reference in references:
            rvox = rconverters[reference][1]
            rresp = get_resp(reference, [story], "story", stack = True, voxels = rvox)
            rsim.append(zscore(rresp.dot(rconverters[reference][0])))
        for c1 in range(len(rsim)):
            for c2 in range(c1+1, len(rsim)):
                reverse_corrs.append((rsim[c1] * rsim[c2]).mean(0))
    return np.mean(reverse_corrs, axis = 0)


In [None]:
# load converter data
resp_align = {}
for subject in consts.SUBJECTS:
    #resp_full = get_resp(subject, stories, "story", stack = True)
    resp_align[subject] = get_resp(subject, stimuli, modality, stack = True)
reverse_corrs = get_reverse_corrs(goal, [subject for subject in consts.SUBJECTS if subject != goal], resp_align)
if exclude is not None:
    exclude_mask = np.load(paths.ROI % (goal, exclude))
    reverse_corrs[exclude_mask] = -1
gvox = nsort(np.argsort(reverse_corrs)[-15000:])

In [None]:
# train converters
converters = {}
for reference in references:
    rvox = np.load(paths.EM % reference, allow_pickle = True).item()['voxels']
    gresp_align = resp_align[goal][:, gvox]
    rresp_align = resp_align[reference][:, rvox]
    converter, _, _ = bootstrap_ridge(gresp_align, rresp_align, alphas = config.ALPHAS, 
            nboots = config.NBOOTS, chunklen = config.CHUNKLEN, use_corr = True, seed = 42)            
    converters[reference] = (converter, gvox, rvox)     

In [8]:
# load gpt
with open(os.path.join(config.DATA_LM_DIR, "perceived", "vocab.json"), "r") as f:
    gpt_vocab = json.load(f)
with open(os.path.join(config.DATA_LM_DIR, "decoder_vocab.json"), "r") as f:
    decoder_vocab = json.load(f)
gpt = GPT(path = os.path.join(config.DATA_LM_DIR, "perceived", "model"), vocab = gpt_vocab, device = "cuda")
features = LMFeatures(model = gpt, layer = config.GPT_LAYER, context_words = config.GPT_WORDS)
lm = LanguageModel(gpt, decoder_vocab, nuc_mass = config.LM_MASS, nuc_ratio = config.LM_RATIO)  

FileNotFoundError: [Errno 2] No such file or directory: '/Users/genevievelam/Documents/GitHub/Cross-participant-semantic-decoding/data_lm/decoder_vocab.json'

In [None]:
# load responses
gresp = get_resp_test(goal, repeat = "first")

# load models
em = {}
wr_data = []
for reference in references:
    wr_data.append(np.load(paths.WR % reference, allow_pickle = True).item())
    em_data = np.load(paths.EM % reference, allow_pickle = True).item()
    converter, gvox, rvox = converters[reference]
    rresp = np.nan_to_num(zscore(gresp[:, gvox].dot(converter)))
    em[reference] = EncodingModel(rresp, em_data["weights"], em_data["noise_model"], device = "cuda")
    em[reference].set_shrinkage(config.NM_ALPHA)
    tr_stats, word_stats = np.array(em_data["tr_stats"]), em_data["word_stats"][0]

In [None]:
# predict word times
with open(os.path.join(config.DATA_TRAIN_DIR, "ROIs", f"{goal}.json"), "r") as f:
    roi_vox = json.load(f)
starttime = -10
word_rate = predict_word_rate(gresp, roi_vox, wr_data)
word_times, tr_times = predict_word_times(word_rate, gresp, starttime = starttime)
lanczos_mat = get_lanczos_mat(word_times, tr_times)

In [None]:
# decode responses
decoder = Decoder(word_times, config.WIDTH)
sm = StimulusModel(lanczos_mat, em_data["tr_stats"], em_data["word_stats"][0], device = "cuda")
for sample_index in range(len(word_times)):
    trs = affected_trs(decoder.first_difference(), sample_index, lanczos_mat)
    ncontext = decoder.time_window(sample_index, config.LM_TIME, floor = 5)
    beam_nucs = lm.beam_propose(decoder.beam, ncontext)
    for c, (hyp, nextensions) in enumerate(decoder.get_hypotheses()):
        nuc, logprobs = beam_nucs[c]
        if len(nuc) < 1: continue
        extend_words = [hyp.words + [x] for x in nuc]
        extend_embs = list(features.extend(extend_words))
        stim = sm.make_variants(sample_index, hyp.embs, extend_embs, trs)
        likelihoods = {}
        for reference in references:
            likelihoods[reference] = em[reference].prs(stim, trs)
        mean_likelihoods = np.sum([likelihoods[reference] for reference in  references], axis = 0)
        local_extensions = [Hypothesis(parent = hyp, extension = x) for x in zip(nuc, logprobs, extend_embs)]
        decoder.add_extensions(local_extensions, mean_likelihoods, nextensions)
    decoder.extend(verbose = (sample_index % 20 == 0))
#decoder.save(args.save)