In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%reload_ext autoreload

In [3]:
import os

# move to project root
while True:
    # get list of directories
    dirs = os.listdir()
    if "README.md" in dirs:
        break
    else:
        os.chdir("..")
print(os.getcwd())

/home/ra/Codes/multilang_timescale


In [33]:
import json 

import numpy as np 

import matplotlib.pyplot as plt
import seaborn as sns

from src.utils.plot import config_plotting, figsize_dict
from src.utils.meta import read_result_meta
from src.utils.utils import get_valid_voxels
from src.utils.weight import process_primal_weight
from src.settings import TrainerSetting

In [6]:
config_plotting('paper')

In [28]:
image_dir = ".temp/image/semantic_cluster/"

result_metric = "r2"    
result_meta_dir = ".temp/result_meta/bling"
alpha = 0.05

In [29]:
subject_ids = ['COL', 'GFW', 'TYE']

In [22]:
# load voxel cluster for each subject
voxel_clusters = {}
for subject_id in subject_ids:
    fn = f".temp/image/semantic_cluster/{subject_id}/cluster_voxels.npz"
    
    voxel_cluster = np.load(fn, allow_pickle=True) 
    
    valid_en_clusters = np.where(voxel_cluster['en'] > 0)[0]
    voxel_clusters[subject_id]= {
        'en': voxel_cluster['en'],
        'zh': voxel_cluster['zh'],
    }

In [30]:
def load_semantic_weight_data(subject_id):
    subject_file_en = f".temp/config/bling/subject/{subject_id}_en.json"
    subject_file_zh = f".temp/config/bling/subject/{subject_id}_zh.json"

    feature_file_en = f".temp/config/bling/feature/{subject_id}/fasttext_stepwise_en.json"
    feature_file_zh = f".temp/config/bling/feature/{subject_id}/fasttext_stepwise_zh.json"

    trainer_en_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_en-save_primal.json"
    )
    trainer_zh_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_zh-save_primal.json"
    )
    # loading semantic vem

    en_meta_df = read_result_meta(
        result_meta_dir,
        trainer_setting_path=trainer_en_file,
        subject_setting_path=subject_file_en,
        feature_setting_path=feature_file_en,
    )
    zh_meta_df = read_result_meta(
        result_meta_dir,
        trainer_setting_path=trainer_zh_file,
        subject_setting_path=subject_file_zh,
        feature_setting_path=feature_file_zh,
    )

    # load trainer json file to TrainerSetting
    with open(trainer_en_file, "r") as f:
        trainer_setting_en = TrainerSetting(**json.load(f))
        
    en_config = en_meta_df.iloc[0].to_dict()
    zh_config = zh_meta_df.iloc[0].to_dict()

    en_stat = en_config["stats_path"]
    en_stat = np.load(en_stat)

    zh_stat = zh_config["stats_path"]
    zh_stat = np.load(zh_stat)

    valid_en_voxel_mask, valid_en_voxel_idx = get_valid_voxels(en_stat, metric=result_metric, alpha=alpha)
    valid_zh_voxel_mask, valid_zh_voxel_idx = get_valid_voxels(zh_stat, metric=result_metric, alpha=alpha)

    valid_voxel_mask = valid_en_voxel_mask & valid_zh_voxel_mask
    valid_voxel_idx = np.intersect1d(valid_en_voxel_idx, valid_zh_voxel_idx)

    en_joint_pred_acc = en_stat[f"test_joint_{result_metric}_score_mask"]
    zh_joint_pred_acc = zh_stat[f"test_joint_{result_metric}_score_mask"]

    # get sqrt
    if result_metric == 'r2':
        en_joint_pred_acc = np.sqrt(en_joint_pred_acc)
        zh_joint_pred_acc = np.sqrt(zh_joint_pred_acc)

    return {
        'en': {
            'config': en_config,
            'stat': en_stat,
            'valid_voxel_mask': valid_en_voxel_mask,
            'valid_voxel_idx': valid_en_voxel_idx,
            'joint_pred_acc': en_joint_pred_acc
        },
        'zh': {
            'config': zh_config,
            'stat': zh_stat,
            'valid_voxel_mask': valid_zh_voxel_mask,
            'valid_voxel_idx': valid_zh_voxel_idx,
            'joint_pred_acc': zh_joint_pred_acc
        }
    }

In [25]:
def load_timescale_data(subject_id):
  

    subject_file_en = f".temp/config/bling/subject/{subject_id}_en.json"
    subject_file_zh = f".temp/config/bling/subject/{subject_id}_zh.json"

    trainer_en_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_en-save_primal.json"
    )
    trainer_zh_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_zh-save_primal.json"
    )

    trainer_en_ts_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_en_timescale.json"
    )

    trainer_zh_ts_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_zh_timescale.json"
    )

    feature_file_en_ts = f".temp/config/bling/feature/{subject_id}/mBERT_all_untrimmed_timescale_stepwise_en.json"
    feature_file_zh_ts = f".temp/config/bling/feature/{subject_id}/mBERT_all_untrimmed_timescale_stepwise_zh.json"

    # loading timescale vem
    en_meta_df = read_result_meta(
        result_meta_dir,
        trainer_setting_path=trainer_en_ts_file,
        subject_setting_path=subject_file_en,
        feature_setting_path=feature_file_en_ts,)

    zh_meta_df = read_result_meta(
        result_meta_dir,
        trainer_setting_path=trainer_zh_ts_file,
        subject_setting_path=subject_file_zh,
        feature_setting_path=feature_file_zh_ts,
    )

    # load trainer json file to TrainerSetting
    with open(trainer_en_file, "r") as f:
        trainer_setting_en = TrainerSetting(**json.load(f))
    en_ts_config = en_meta_df.iloc[0].to_dict()
    zh_ts_config = zh_meta_df.iloc[0].to_dict()
        
    en_ts_stat = en_ts_config["stats_path"]
    en_ts_stat = np.load(en_ts_stat)

    zh_ts_stat = zh_ts_config["stats_path"]
    zh_ts_stat = np.load(zh_ts_stat)

    valid_en_ts_voxel_mask, valid_en_ts_voxel_idx = get_valid_voxels(en_ts_stat, metric=result_metric, alpha=alpha)
    valid_zh_ts_voxel_mask, valid_zh_ts_voxel_idx = get_valid_voxels(zh_ts_stat, metric=result_metric, alpha=alpha)

    valid_ts_voxel_mask = valid_en_ts_voxel_mask & valid_zh_ts_voxel_mask
    valid_ts_voxel_idx = np.intersect1d(valid_en_ts_voxel_idx, valid_zh_ts_voxel_idx)

    en_timescale = en_ts_stat[f"test_{result_metric}_selectivity_mask"]
    zh_timescale = zh_ts_stat[f"test_{result_metric}_selectivity_mask"]

    return {
        'en': en_timescale,
        'zh': zh_timescale,

        'valid_mask': valid_ts_voxel_mask,
        'valid_idx': valid_ts_voxel_idx
    }

In [36]:
def get_primal_weight(semantic_weight_data):
    en_primal_weight = semantic_weight_data['en']['config']['primal_weights_path']
    zh_primal_weight = semantic_weight_data['zh']['config']['primal_weights_path']

    en_primal_weight = np.load(en_primal_weight)    
    zh_primal_weight = np.load(zh_primal_weight)

    en_primal_weight = np.squeeze(en_primal_weight)
    zh_primal_weight = np.squeeze(zh_primal_weight)

    en_primal_weight = process_primal_weight(en_primal_weight, prediction_score=semantic_weight_data['en']['joint_pred_acc'], normalize=True)
    zh_primal_weight = process_primal_weight(zh_primal_weight, prediction_score=semantic_weight_data['zh']['joint_pred_acc'], normalize=True)

    return {
        'en': en_primal_weight,
        'zh': zh_primal_weight
    } 

In [37]:
timescales = {}
for subject_id in subject_ids:
    timescales[subject_id] = load_timescale_data(subject_id)

In [38]:
vems = {}
for subject_id in subject_ids:
    vems[subject_id] = load_semantic_weight_data(subject_id)

  en_joint_pred_acc = np.sqrt(en_joint_pred_acc)
  zh_joint_pred_acc = np.sqrt(zh_joint_pred_acc)
  en_joint_pred_acc = np.sqrt(en_joint_pred_acc)
  zh_joint_pred_acc = np.sqrt(zh_joint_pred_acc)
  en_joint_pred_acc = np.sqrt(en_joint_pred_acc)
  zh_joint_pred_acc = np.sqrt(zh_joint_pred_acc)


In [39]:
primal_weights = {}
for subject_id in subject_ids:
    primal_weights[subject_id] = get_primal_weight(vems[subject_id])

FileNotFoundError: [Errno 2] No such file or directory: '.temp/result/bling/412fb546-2565-4e3c-9d9d-a64afa4a7bc8/primal_coef.npz'

In [32]:
# loading fasttext embeddings

en_fasttext = ".temp/misc/word_list/en_fasttext_embeddings.npy"
zh_fasttext = ".temp/misc/word_list/zh_fasttext_embeddings.npy"

en_fasttext = np.load(en_fasttext, allow_pickle=True).tolist()
zh_fasttext = np.load(zh_fasttext, allow_pickle=True).tolist()

en_words = list(en_fasttext.keys())
zh_words = list(zh_fasttext.keys())

# get word from en_words and zh_words
en_words = np.array(en_words)
zh_words = np.array(zh_words)

# now get all value from en_fasttext dictionary and project that to pca
en_fasttext_values = np.array(list(en_fasttext.values()))
zh_fasttext_values = np.array(list(zh_fasttext.values()))
