In [1]:
import pandas as pd
from tqdm.notebook import tqdm_notebook as tqdm
import os
import itertools
from rsa import compute_rsa
from joblib import Parallel, delayed
import gc

In [2]:
# Generating Pairwise Combinations Between Embeddings
dir_path = f'../../data/processed/rsms/'
file_names = os.listdir(dir_path) 
file_names.remove('.DS_Store') if '.DS_Store' in file_names else None
file_pair_combs = list(itertools.combinations(file_names, 2))
len(file_pair_combs)

300

In [None]:
# Function to process each file pair
def process_file_pair(file_name_i, file_name_j, dir_path):
    print(f"{file_name_i, file_name_j}")

    # Load the RSMs
    rsm_i = pd.read_pickle(dir_path + file_name_i)
    rsm_j = pd.read_pickle(dir_path + file_name_j)

    # Compute RSA
    corr, n_words = compute_rsa(rsm_i, rsm_j)

    # Extract names
    name_i, name_j = file_name_i[:-4], file_name_j[:-4]
    print(f"spearman_r={corr}, n_words={n_words}")
    print('-------------------------------------------------')
    
    # Free memory by deleting the RSMs
    del rsm_i, rsm_j
    gc.collect()

    return [name_i, name_j, corr, n_words]

# Make the output directory
output_dir = '../../data/final/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Compute RSA in parallel
rsa = Parallel(n_jobs=1)(
    delayed(process_file_pair)(file_name_i, file_name_j, dir_path)
    for file_name_i, file_name_j in tqdm(file_pair_combs)
)

# Save the results
rsa_df = pd.DataFrame(rsa, columns=['name_i', 'name_j', 'spearman', 'n_words'])
rsa_df.to_csv(f'../../data/final/rsa.csv', index=False)

  0%|          | 0/300 [00:00<?, ?it/s]

('compo_attribs.pkl', 'SVD_sim_rel.pkl')
spearman_r=0.08862861920108459, n_words=354
-------------------------------------------------
('compo_attribs.pkl', 'EEG_speech.pkl')
spearman_r=0.005647605373435004, n_words=119
-------------------------------------------------
('compo_attribs.pkl', 'microarray.pkl')
spearman_r=0.0249043021549751, n_words=99
-------------------------------------------------
('compo_attribs.pkl', 'SGSoftMaxInput_SWOW.pkl')
spearman_r=0.27500957662993364, n_words=476
-------------------------------------------------
('compo_attribs.pkl', 'PPMI_SVD_SWOW.pkl')
spearman_r=0.3368178533079305, n_words=476
-------------------------------------------------
('compo_attribs.pkl', 'GloVe_Wikipedia.pkl')
spearman_r=0.3361246767892626, n_words=532
-------------------------------------------------
('compo_attribs.pkl', 'EEG_text.pkl')
spearman_r=-0.03226181388556629, n_words=181
-------------------------------------------------
('compo_attribs.pkl', 'PPMI_SVD_SOUTH_FLORIDA.pk