# RSA

This notebook has code to compute RSA between Exp48/fMRI data and model representations. Spearman correlations are computed between model representations and fMRI responses aggregated across participants.

In [2]:
import pickle
import pandas as pd
from src.paths import ROOT
from scipy.stats import spearmanr
from scipy.spatial.distance import pdist, squareform
from src.fmri_rsa_utils import *
from src.utils import *
import numpy as np

In [3]:
brain_results = open_json(ROOT / 'results/rsa/rsa_brain_set2.json')
exp48_results = open_json(ROOT / 'results/rsa/rsa_exp48_set2.json')

In [4]:
#Creating RDMs for Exp features

# experiential features
exp_ftrs = pd.read_csv(ROOT / 'data/exp_features.csv')

# organising exp arrays into a dict
exp_dict = {exp_ftrs.iloc[i,0]: np.array(exp_ftrs.iloc[i,1:].tolist()) for i in range(len(exp_ftrs))}

# distance matrices (1-cosine_similarity)
s2_exp = pd.read_csv(ROOT / 'data/Study2_model_RSMs/Exp48_SOE320_sim_mat.csv')
words_s2 = s2_exp.Word.tolist()

# organising words from study 2 in a dedicated matrix
s2_exp_mat = np.stack([exp_dict[word] for word in words_s2])

# obtaining RDM
exp_rdm2 = get_distance_vec(s2_exp_mat)

In [5]:
# Loading brain data into RDM
brain_2 = get_fmri_rdm_study2_aggregated('semantic')

## Isolated words

Here, we're considering embeddings obtained by inputting the models with isolated words.

In [6]:
simcse = pickle.load(open(ROOT / 'results/simcse/simcse_layers.pkl', 'rb'))
mcse = pickle.load(open(ROOT / 'results/mcse/mcse_bert_coco_layers.pkl', 'rb'))
clap = pickle.load(open(ROOT / 'results/clap/clap_layers.pkl', 'rb'))
clip = pickle.load(open(ROOT / 'results/clip/clip.pkl', 'rb'))
bert = pickle.load(open(ROOT / 'results/bert/bert_layers.pkl', 'rb'))
vbert = pickle.load(open(ROOT / 'results/visualbert/visualbert_layers.pkl', 'rb'))

In [7]:
for model_embs, model_name in zip([simcse, mcse, clap, bert, vbert, clip], ['simcse', 'mcse', 'clap', 'bert', 'visualbert', 'clip']):
    brain_alignment = compute_layer_alignment_w_brain_aggregated(model_embs, brain_2, words_s2)
    exp48_alignment = compute_layer_alignment_w_exp48(model_embs, exp_rdm2, words_s2)
    brain_results[model_name]['non-contextualised']['set2'] = brain_alignment
    exp48_results[model_name]['non-contextualised']['set2'] = exp48_alignment

Computing additional baselines with GloVe and Word2vec

In [None]:
glove2 = pd.read_csv(ROOT / 'data/Study2_model_RSMs/GloVe_SOE320_sim_mat.csv')

glove2_rdm = np.array(1 - glove2.iloc[:,1:])[np.triu_indices(n=320, m=320, k=1)]


rho_exp_2, pval_exp_2 = spearmanr(exp_rdm2, glove2_rdm)
print(f"\nStudy 2 corr w/ Exp48: corr={round(rho_exp_2, 3)} p-val={round(pval_exp_2, 3)}")

rho_brain_2, pval_brain_2 = spearmanr(brain_2, glove2_rdm)
print(f"Study 2 corr w/ Brain: corr={round(rho_brain_2, 3)} p-val={round(pval_brain_2, 3)}")

In [None]:
w2v2 = pd.read_csv(ROOT / 'data/Study2_model_RSMs/word2vec_SOE320_sim_mat.csv')

w2v2_rdm = np.array(1 - w2v2.iloc[:,1:])[np.triu_indices(n=320, m=320, k=1)]
rho_exp_2, pval_exp_2 = spearmanr(exp_rdm2, w2v2_rdm)
print(f"\nStudy 2 corr w/ Exp48: corr={round(rho_exp_2, 3)} p-val={round(pval_exp_2, 3)}")

rho_brain_2, pval_brain_2 = spearmanr(brain_2, w2v2_rdm)
print(f"Study 2 corr w/ Brain: corr={round(rho_brain_2, 3)} p-val={round(pval_brain_2, 3)}")

## Contextualised embeddings

Here, we consider embeddings obtained from nouns plugged into neutral sentences.

In [11]:
simcse_cont = pickle.load(open(ROOT / 'results/simcse/simcse_layers_context.pkl', 'rb'))
mcse_cont = pickle.load(open(ROOT / 'results/mcse/mcse_layers_context.pkl', 'rb'))
clap_cont = pickle.load(open(ROOT / 'results/clap/clap_layers_context.pkl', 'rb'))
bert_cont = pickle.load(open(ROOT / 'results/bert/bert_layers_context.pkl', 'rb'))
vbert_cont = pickle.load(open(ROOT / 'results/visualbert/visualbert_layers_context.pkl', 'rb'))
clip_cont = pickle.load(open(ROOT / 'results/clip/clip_layers_context.pkl', 'rb'))


In [12]:
for model_embs, model_name in zip([simcse_cont, mcse_cont, clap_cont, bert_cont, vbert_cont, clip_cont], ['simcse', 'mcse', 'clap', 'bert', 'visualbert', 'clip']):
    avg_mat = average_representations_across_prompts(model_embs)
    brain_alignment = compute_layer_alignment_w_brain_aggregated(avg_mat, brain_2, words_s2)
    exp48_alignment = compute_layer_alignment_w_exp48(avg_mat, exp_rdm2, words_s2)
    brain_results[model_name]['context']['set2'] = brain_alignment
    exp48_results[model_name]['context']['set2'] = exp48_alignment

## Contextualised embeddings caption-like templates

Here, we consider embeddings obtained from nouns plugged into caption-like sentences.

In [13]:
simcse_cont_vis = pickle.load(open(ROOT / 'results/simcse/simcse_layers_visual_context_specific.pkl', 'rb'))
mcse_cont_vis = pickle.load(open(ROOT / 'results/mcse/mcse_layers_visual_context_specific.pkl', 'rb'))
clap_cont_vis = pickle.load(open(ROOT / 'results/clap/clap_layers_visual_context_specific.pkl', 'rb'))
bert_cont_vis = pickle.load(open(ROOT / 'results/bert/bert_layers_visual_context_specific.pkl', 'rb'))
vbert_cont_vis = pickle.load(open(ROOT / 'results/visualbert/visualbert_layers_visual_context_specific.pkl', 'rb'))
clip_cont_vis = pickle.load(open(ROOT / 'results/clip/clip_layers_visual_context_specific.pkl', 'rb'))


In [14]:
for model_embs, model_name in zip([simcse_cont_vis, mcse_cont_vis, clap_cont_vis, bert_cont_vis, vbert_cont_vis, clip_cont_vis], ['simcse', 'mcse', 'clap', 'bert', 'visualbert', 'clip']):
    avg_mat = average_representations_across_prompts(model_embs)
    brain_alignment = compute_layer_alignment_w_brain_aggregated(avg_mat, brain_2, words_s2)
    exp48_alignment = compute_layer_alignment_w_exp48(avg_mat, exp_rdm2, words_s2)
    brain_results[model_name]['visual context']['set2'] = brain_alignment
    exp48_results[model_name]['visual context']['set2'] = exp48_alignment

In [12]:
dict_to_json(brain_results, ROOT / 'results/rsa/rsa_brain_set2.json')
dict_to_json(exp48_results, ROOT / 'results/rsa/rsa_exp48_set2.json')