In [1]:
import pandas as pd
from tqdm.notebook import tqdm_notebook as tqdm
import os
import itertools
from rsa import compute_rsa
from joblib import Parallel, delayed
import gc

In [2]:
# Generating Pairwise Combinations Between Embeddings
dir_path = f'../../data/processed/rsms/'
file_names = os.listdir(dir_path) 
file_names.remove('.DS_Store') if '.DS_Store' in file_names else None
file_pair_combs = list(itertools.combinations(file_names, 2))
len(file_pair_combs)

300

In [3]:
# Function to process each file pair
def process_file_pair(file_name_i, file_name_j, dir_path):
    print(f"{file_name_i, file_name_j}")

    # Load the RSMs
    rsm_i = pd.read_pickle(dir_path + file_name_i)
    rsm_j = pd.read_pickle(dir_path + file_name_j)

    # Compute RSA
    corr, n_words = compute_rsa(rsm_i, rsm_j)

    # Extract names
    name_i, name_j = file_name_i[:-4], file_name_j[:-4]
    print(f"spearman_r={corr}, n_words={n_words}")
    print('-------------------------------------------------')
    
    # Free memory by deleting the RSMs
    del rsm_i, rsm_j
    gc.collect()

    return [name_i, name_j, corr, n_words]

# Make the output directory
output_dir = '../../data/final/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Compute RSA in parallel
rsa = Parallel(n_jobs=2)(
    delayed(process_file_pair)(file_name_i, file_name_j, dir_path)
    for file_name_i, file_name_j in tqdm(file_pair_combs)
)

# Save the results
rsa_df = pd.DataFrame(rsa, columns=['name_i', 'name_j', 'spearman', 'n_words'])
rsa_df.to_csv(f'../../data/final/rsa.csv', index=False)

  0%|          | 0/300 [00:00<?, ?it/s]

('eye_tracking.pkl', 'SVD_sim_rel.pkl')
spearman_r=0.003103793274149264, n_words=2121
-------------------------------------------------
('eye_tracking.pkl', 'fastText_Wiki_News.pkl')
spearman_r=0.10598061680777522, n_words=7366
-------------------------------------------------
('eye_tracking.pkl', 'SGSoftMaxOutput_SWOW.pkl')
spearman_r=-0.05545045296226891, n_words=5690
-------------------------------------------------
('eye_tracking.pkl', 'GloVe_Wikipedia.pkl')
spearman_r=0.11313880865148941, n_words=7327
-------------------------------------------------
('eye_tracking.pkl', 'fMRI_speech_hyper_align.pkl')
spearman_r=0.023718973070094133, n_words=501
-------------------------------------------------
('eye_tracking.pkl', 'PPMI_SVD_SOUTH_FLORIDA.pkl')
spearman_r=0.012406904995839331, n_words=2359
-------------------------------------------------
('eye_tracking.pkl', 'THINGS.pkl')
spearman_r=0.011341327699556578, n_words=270
-------------------------------------------------
('eye_tracking

python(3575) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


('eye_tracking.pkl', 'SGSoftMaxInput_SWOW.pkl')
spearman_r=0.03083141118461141, n_words=4041
-------------------------------------------------
('eye_tracking.pkl', 'fastTextSub_OpenSub.pkl')
spearman_r=0.03822848270698151, n_words=7300
-------------------------------------------------
('eye_tracking.pkl', 'feature_overlap.pkl')
spearman_r=0.11932173512670685, n_words=2038
-------------------------------------------------
('eye_tracking.pkl', 'PPMI_SVD_SWOW.pkl')
spearman_r=0.04332854029824027, n_words=4041
-------------------------------------------------
('eye_tracking.pkl', 'GloVe_Twitter.pkl')
spearman_r=0.12255021400591048, n_words=7056
-------------------------------------------------
('eye_tracking.pkl', 'GloVe_CommonCrawl.pkl')
spearman_r=0.14412317500888597, n_words=7371
-------------------------------------------------
('eye_tracking.pkl', 'EEG_speech.pkl')
spearman_r=0.04622254601370687, n_words=1101
-------------------------------------------------
('eye_tracking.pkl', 'morp

python(3619) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


('eye_tracking.pkl', 'EEG_text.pkl')
spearman_r=0.09701772722473243, n_words=3355
-------------------------------------------------
('eye_tracking.pkl', 'LexVec_CommonCrawl.pkl')
spearman_r=0.09022333337482821, n_words=7370
-------------------------------------------------
('SVD_sim_rel.pkl', 'SGSoftMaxInput_SWOW.pkl')
spearman_r=0.021441354014303773, n_words=4044
-------------------------------------------------
('SVD_sim_rel.pkl', 'fastText_Wiki_News.pkl')
spearman_r=0.017060738060089665, n_words=5977
-------------------------------------------------
('SVD_sim_rel.pkl', 'fastTextSub_OpenSub.pkl')
spearman_r=0.015130515758309973, n_words=5816
-------------------------------------------------
('SVD_sim_rel.pkl', 'GloVe_Wikipedia.pkl')
spearman_r=0.013611113382937731, n_words=5833
-------------------------------------------------
('SVD_sim_rel.pkl', 'fMRI_speech_hyper_align.pkl')
spearman_r=0.0067625628475851485, n_words=235
-------------------------------------------------
('SVD_sim_re

python(3701) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
exception calling callback for <Future at 0x7fa0b2f1d720 state=finished raised TerminatedWorkerError>
Traceback (most recent call last):
  File "/Users/zhussain/opt/anaconda3/envs/PsychEmbeddings/lib/python3.10/site-packages/joblib/externals/loky/_base.py", line 625, in _invoke_callbacks
    callback(self)
  File "/Users/zhussain/opt/anaconda3/envs/PsychEmbeddings/lib/python3.10/site-packages/joblib/parallel.py", line 360, in __call__
    self.parallel.dispatch_next()
  File "/Users/zhussain/opt/anaconda3/envs/PsychEmbeddings/lib/python3.10/site-packages/joblib/parallel.py", line 797, in dispatch_next
    if not self.dispatch_one_batch(self._original_iterator):
  File "/Users/zhussain/opt/anaconda3/envs/PsychEmbeddings/lib/python3.10/site-packages/joblib/parallel.py", line 864, in dispatch_one_batch
    self._dispatch(tasks)
  File "/Users/zhussain/opt/anaconda3/envs/PsychEmbeddings/lib/pyt

TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.

The exit codes of the workers are {SIGKILL(-9)}