In [1]:
import os

# move to project root
while True:
    # get list of directories
    dirs = os.listdir()
    if "README.md" in dirs:
        break
    else:
        os.chdir("..")
print(os.getcwd())

/mnt/antares_raid/home/bramantyos/codes/multilang_timescale


In [2]:
import json
from typing import List, Dict

In [3]:
from src.settings import TrainerConfig, SubjectConfig, FeatureConfig, ResultConfig
from src.trainer import Trainer
from src.config import timescales

In [4]:
# logger = logging.getLogger('load_data')
# logger.setLevel('INFO')

In [5]:
# Params
lang_code = "en"
subject_id = "COL"
task = "reading"

# Data

In [6]:
# BLING
## COL
feature_dir = ".temp/feature/joined"

col_en_data = ".temp/fmri/bling/COL/moth_reading_en_COL.hf5"
col_zh_data = ".temp/fmri/bling/COL/moth_reading_zh_COL.hf5"
fmri_data = col_en_data if not lang_code=="zh" else col_zh_data

In [7]:
lm_feature_path = ".temp/feature/timescales_mBERT_all_en.npz" if not lang_code=="zh" else ".temp/feature/timescales_mBERT_all_zh.npz"

sensory_feature_list = ['numwords', 'numletters', 'moten']

joined_feature_list = "-".join(sensory_feature_list)

sensory_feature_path = f"Baseline_bling_{task}_{lang_code}_{subject_id}_{joined_feature_list}.npz" 
sensory_feature_path = os.path.join(feature_dir, sensory_feature_path)

# Config

In [39]:

sub_config = SubjectConfig(
    sub_id=subject_id,
    sub_fmri_train_test_path = fmri_data,
    lang_code=lang_code,
    sub_trim_start = 10,
    sub_trim_end = 12
)



In [40]:
trainer_config = TrainerConfig(
    backend="torch_cuda",
    result_save_dir = ".temp/result/bling",
    result_meta_save_dir = ".temp/result_meta/bling",
    n_targets_batch = 4096,
    n_targets_batch_refit = 2048,)

In [41]:
feature_config = FeatureConfig(
    lm_feature_type="mBERT",
    lm_feature_path=lm_feature_path,
    join_sensory_feature_path = sensory_feature_path,
)

In [42]:
config_dir = ".temp/config/bling"


In [43]:
sub_json = os.path.join(config_dir, "subject", f"{subject_id}_{lang_code}.json")
with open(sub_json, "w") as f:
    json.dump(sub_config.__dict__, f, indent=4)
    

In [44]:
feature_json = os.path.join(config_dir, "feature", f"mBERT_all_timescale_{lang_code}.json")
with open(feature_json, "w") as f:
    json.dump(feature_config.__dict__, f, indent=4)

In [45]:
trainer_json = os.path.join(config_dir, "train", f"trainer.json")
with open(trainer_json, "w") as f:
    json.dump(trainer_config.__dict__, f, indent=4)

# Training

In [15]:
trainer = Trainer(sub_json, feature_json, trainer_json)

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 3737 and the array at index 8 has size 3651

In [None]:
trainer.train()

# Misc

In [8]:
import numpy as np


In [9]:
mbert = np.load(lm_feature_path, allow_pickle=True)

In [10]:
mbert_train = mbert['train']

In [11]:
mbert_train = mbert_train.tolist()

In [12]:
mbert_train['2_4_words'].shape

(3737, 9984)

In [13]:
from src.config import train_stories, test_stories, train_stories_zh, test_stories_zh
from src.utils import load_dict, cook_responses

In [14]:
data = load_dict(fmri_data)

In [16]:
train_data, test_data = cook_responses(
    data,
    # test_runs=test_stories_zh,
    # train_runs=train_stories_zh,
    test_runs=test_stories,
    train_runs=train_stories,
    
    # trim_start_length=sub_config.sub_trim_start,
    # trim_end_length=sub_config.sub_trim_end,
    multiseries="average_across",
)


In [17]:
train_data = np.nan_to_num(train_data)
test_data = np.nan_to_num(test_data[0])

In [18]:
train_data.shape

(3737, 66367)