In [42]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [43]:
%reload_ext autoreload

In [44]:
import os

# move to project root
while True:
    # get list of directories
    dirs = os.listdir()
    if "README.md" in dirs:
        break
    else:
        os.chdir("..")
print(os.getcwd())

/mnt/antares_raid/home/bramantyos/codes/multilang_timescale


In [45]:
import numpy as np

import cortex

import matplotlib.pyplot as plt
import seaborn as sns

from src.trainer import Trainer
from src.settings import ResultSetting

from src.utils import put_values_on_mask

from src.utils import read_result_meta, get_surface_dict, delete_empty_result
from src.plot_utils import (
    plot_timescale_flatmap_from_volume,
    plot_volume_rgb,
    plot_joint_result,
    plot_density,
)

In [46]:
from src.configurations import config_plotting

config_plotting("paper")

In [47]:
surfaces_json = ".temp/fmri/bling/surfaces.json"
surfaces_dir = "/mnt/raid/bling/share/pycortex_store"

result_metric = "r2"
result_meta_dir = ".temp/result_meta/bling"

subjects_ids = ['COL', 'GFW', 'TYE']

alpha = 0.05
alpha_pred_acc = None

In [55]:
timescale_data = {}

for subject_id in subjects_ids:
    surface_dict = get_surface_dict(subject_id)

    subject_file_en = f".temp/config/bling/subject/{subject_id}_en.json"
    subject_file_zh = f".temp/config/bling/subject/{subject_id}_zh.json"

    trainer_en_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_en_timescale.json"
    )
    trainer_zh_file = (
        f".temp/config/bling/train/stepwise/{subject_id.lower()}_zh_timescale.json"
    )
        
    feature_file_en = f".temp/config/bling/feature/{subject_id}/mBERT_all_untrimmed_timescale_stepwise_en.json"
    feature_file_zh = f".temp/config/bling/feature/{subject_id}/mBERT_all_untrimmed_timescale_stepwise_zh.json"

    en_meta_df = read_result_meta(
        result_meta_dir,
        trainer_setting_path=trainer_en_file,
        subject_setting_path=subject_file_en,
        feature_setting_path=feature_file_en,
    )
    zh_meta_df = read_result_meta(
        result_meta_dir,
        trainer_setting_path=trainer_zh_file,
        subject_setting_path=subject_file_zh,
        feature_setting_path=feature_file_zh,
    )

    en_config = en_meta_df.iloc[0].to_dict()
    zh_config = zh_meta_df.iloc[0].to_dict()

    en_stats = np.load(en_config["stats_path"])
    zh_stats = np.load(zh_config["stats_path"])
    
    keyword = f"test_{result_metric}_selectivity_mask"
    p_val_keyword = f"test_p_values_{result_metric}_mask"
    
    timescale_en, valid_en_timescale_voxel = put_values_on_mask(
        en_stats[keyword],
        zh_stats[p_val_keyword],
        ev_mask=None,
        alpha=alpha,
        valid_range=(8, 256),
    )
    
    timescale_zh, valid_zh_timescale_voxel = put_values_on_mask(
        zh_stats[keyword],
        zh_stats[p_val_keyword],
        ev_mask=None,
        alpha=alpha,
        valid_range=(8, 256),
    )
    
    shared_voxel = np.intersect1d(valid_en_timescale_voxel, valid_zh_timescale_voxel)
    
    # drop nan
    timescale_data[subject_id] = {
        "en": timescale_en[shared_voxel],
        "zh": timescale_zh[shared_voxel],
    }
    

In [56]:
# permutation test to compare the two distributions
from scipy.stats import ks_2samp

for i, subject_id in enumerate(subjects_ids):
    ks_stat, ks_p_val = ks_2samp(
        timescale_data[subject_id]["en"],
        timescale_data[subject_id]["zh"],
    )
    
    print(f"{subject_id} KS test p-value: {ks_p_val}")

COL KS test p-value: 1.1260632356052097e-18
GFW KS test p-value: 3.459127048527685e-32
TYE KS test p-value: 1.7281630060117056e-34
