In [1]:
import pandas as pd
from tqdm.notebook import tqdm_notebook as tqdm
import os
import itertools
from rsa import compute_rsa
from joblib import Parallel, delayed

In [2]:
# Generating Pairwise Combinations Between Embeddings
dir_path = f'../../data/processed/rsms/'
file_names = os.listdir(dir_path)
file_pair_combs = list(itertools.combinations(file_names, 2))
len(file_pair_combs)

300

In [3]:
# Function to process each file pair
def process_file_pair(file_name_i, file_name_j, dir_path):
    print(f"{file_name_i, file_name_j}")

    # Load the RSMs
    rsm_i = pd.read_pickle(dir_path + file_name_i)
    rsm_j = pd.read_pickle(dir_path + file_name_j)

    # Compute RSA
    corr, n_words = compute_rsa(rsm_i, rsm_j)

    # Extract names
    name_i, name_j = file_name_i[:-4], file_name_j[:-4]
    print(f"spearman_r={corr}, n_words={n_words}")
    print('-------------------------------------------------')
    
    return [name_i, name_j, corr, n_words]

# Make the output directory
output_dir = '../../data/final/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Compute RSA in parallel
rsa = Parallel(n_jobs=3)(
    delayed(process_file_pair)(file_name_i, file_name_j, dir_path) for file_name_i, file_name_j in tqdm(file_pair_combs)
)

# Save the results
rsa_df = pd.DataFrame(rsa, columns=['name_i', 'name_j', 'spearman_local', 'n_words'])
rsa_df.to_csv(f'../../data/final/rsa.csv', index=False)

  0%|          | 0/300 [00:00<?, ?it/s]

KeyboardInterrupt: 