In [2]:
import pandas as pd
from tqdm.notebook import tqdm_notebook as tqdm
import os
import itertools
from rsa import compute_rsa

In [3]:
# Generating Pairwise Combinations Between Embeddings
dir_path = f'../../data/processed/rsms/'
file_names = os.listdir(dir_path)
file_pair_combs = list(itertools.combinations(file_names, 2))
len(file_pair_combs)

300

In [4]:
# Make the output directory
output_dir = '../../data/final/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Compute RSA
rsa = []
for file_name_i, file_name_j in tqdm(file_pair_combs):
    print(f"{file_name_i, file_name_j}")

    # Load the RSMs
    rsm_i = pd.read_pickle(dir_path + file_name_i)
    rsm_j = pd.read_pickle(dir_path + file_name_j)

    # Compute RSA
    corr, n_words = compute_rsa(rsm_i, rsm_j)

    # Save the results
    name_i, name_j = file_name_i[:-4], file_name_j[:-4]
    print(f"spearman_r={corr}, n_words={n_words}")
    print('-------------------------------------------------')
    rsa.append([name_i, name_j, corr, n_words])

rsa = pd.DataFrame(rsa, columns=['name_i', 'name_j', 'spearman_local', 'n_words'])
rsa.to_csv(f'../../data/final/rsa.csv', index=False)
rsa

  0%|          | 0/300 [00:00<?, ?it/s]

('eye_tracking.pkl', 'tungsten_microarray.pkl', 417)
spearman_r=0.045883265795570664
-------------------------------------------------
('SVD_sim_rel.pkl', 'tungsten_microarray.pkl', 276)
spearman_r=0.01239717804845534
-------------------------------------------------
('SGSoftMaxInput_SWOW.pkl', 'tungsten_microarray.pkl', 490)
spearman_r=0.04599839479414725
-------------------------------------------------
('PPMI_SVD_EAT.pkl', 'tungsten_microarray.pkl', 452)
spearman_r=0.06099406580786208
-------------------------------------------------
('fastText_Wiki_News.pkl', 'tungsten_microarray.pkl', 602)
spearman_r=0.12373625620001892
-------------------------------------------------
('fastTextSub_OpenSub.pkl', 'tungsten_microarray.pkl', 590)
spearman_r=0.062156204375732875
-------------------------------------------------
('feature_overlap.pkl', 'tungsten_microarray.pkl', 346)
spearman_r=0.16556617350367817
-------------------------------------------------
('SGSoftMaxOutput_SWOW.pkl', 'tungsten