# Partial Correlations

This notebook contains code to reproduce the partial correlation analysis.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from src.paths import ROOT
from scipy.stats import spearmanr
from src.utils import *
from src.fmri_rsa_utils import *
# from src.dim_red_utils import *
import pickle
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

In [2]:
#Creating RDMs for Exp features

# experiential features
exp_ftrs = pd.read_csv(ROOT / 'data/exp_features.csv')

# organising exp arrays into a dict
exp_dict = {exp_ftrs.iloc[i,0]: np.array(exp_ftrs.iloc[i,1:].tolist()) for i in range(len(exp_ftrs))}

# distance matrices (1-cosine_similarity)
s2_exp = pd.read_csv(ROOT / 'data/Study2_model_RSMs/Exp48_SOE320_sim_mat.csv')
words_s2 = s2_exp.Word.tolist()

# organising words from the 2 studies in separate matrices

s2_exp_mat = np.stack([exp_dict[word] for word in words_s2])
s2_exp_mat.shape

# obtaining RDMs
exp_rdm2 = get_distance_vec(s2_exp_mat)

In [3]:
# importing embeddings
brain2 = pd.read_csv(ROOT / 'results/rsa/brain_2_contextualised.csv')
simcse_cont = pickle.load(open(ROOT / 'results/simcse/simcse_layers_context.pkl', 'rb'))
mcse_cont = pickle.load(open(ROOT / 'results/mcse/mcse_layers_context.pkl', 'rb'))
bert_cont = pickle.load(open(ROOT / 'results/bert/bert_layers_context.pkl', 'rb'))
vbert_cont = pickle.load(open(ROOT / 'results/visualbert/visualbert_layers_context.pkl', 'rb'))
clap_cont = pickle.load(open(ROOT / 'results/clap/clap_layers_context.pkl', 'rb'))

In [4]:
# importing best layers
best_layers = open_json(ROOT / 'results/rsa/best_layers.json')

# getting rdms
simcse_avg = average_representations_across_prompts(simcse_cont)
mcse_avg = average_representations_across_prompts(mcse_cont)
clap_avg = average_representations_across_prompts(clap_cont)
bert_avg = average_representations_across_prompts(bert_cont)
vbert_avg = average_representations_across_prompts(vbert_cont)

simcse_rdm = get_distance_vec(np.stack([simcse_avg[word][np.array(best_layers['simcse']['brain']), :].mean(axis=0) for word in words_s2]))
mcse_rdm = get_distance_vec(np.stack([mcse_avg[word][np.array(best_layers['mcse']['brain']), :].mean(axis=0) for word in words_s2]))
clap_rdm = get_distance_vec(np.stack([clap_avg[word][np.array(best_layers['clap']['brain']), :].mean(axis=0) for word in words_s2]))
bert_rdm = get_distance_vec(np.stack([bert_avg[word][np.array(best_layers['bert']['brain']), :].mean(axis=0) for word in words_s2]))
vbert_rdm = get_distance_vec(np.stack([vbert_avg[word][np.array(best_layers['visualbert']['brain']), :].mean(axis=0) for word in words_s2]))

# loading brain RDM
brain_data_2 = get_fmri_rdm_study2_aggregated('semantic')

In [5]:
def regress_model(model_to_remove, target_model, brain_data):

    lm = LinearRegression().fit(model_to_remove.reshape(-1, 1), target_model)
    residuals = target_model - lm.predict(model_to_remove.reshape(-1, 1))
    rho, pval = spearmanr(residuals, brain_data)
    return rho, pval



In [6]:
models_wo_exp48 = {}
# Regressing exp48 out of mcse

rho, pval = regress_model(model_to_remove=exp_rdm2, target_model=mcse_rdm, brain_data=brain_data_2)
print(f"Independent contribution of MCSE to fMRI prediction without Exp48: rho={round(rho, 3)}, p={round(pval,3)}")
models_wo_exp48['mcse'] = rho

# Regressing exp48 out of simcse
rho, pval = regress_model(model_to_remove=exp_rdm2, target_model=simcse_rdm, brain_data=brain_data_2)
print(f"Independent contribution of SimCSE to fMRI prediction without Exp48: rho={round(rho, 3)}, p={round(pval,3)}")
models_wo_exp48['simcse'] = rho

# Regressing exp48 out of clap
rho, pval = regress_model(model_to_remove=exp_rdm2, target_model=clap_rdm, brain_data=brain_data_2)
print(f"Independent contribution of CLAP to fMRI prediction without Exp48: rho={round(rho, 3)}, p={round(pval,3)}")
models_wo_exp48['clap'] = rho

# Regressing exp48 out of bert
rho, pval = regress_model(model_to_remove=exp_rdm2, target_model=bert_rdm, brain_data=brain_data_2)
print(f"Independent contribution of BERT to fMRI prediction without Exp48: rho={round(rho, 3)}, p={round(pval,3)}")
models_wo_exp48['bert'] = rho

# Regressing exp48 out of visualbert
rho, pval = regress_model(model_to_remove=exp_rdm2, target_model=vbert_rdm, brain_data=brain_data_2)
print(f"Independent contribution of VisualBERT to fMRI prediction without Exp48: rho={round(rho, 3)}, p={round(pval,3)}")
models_wo_exp48['visualbert'] = rho

Independent contribution of MCSE to fMRI prediction without Exp48: rho=0.064, p=0.0
Independent contribution of SimCSE to fMRI prediction without Exp48: rho=0.096, p=0.0
Independent contribution of CLAP to fMRI prediction without Exp48: rho=-0.005, p=0.231
Independent contribution of BERT to fMRI prediction without Exp48: rho=0.104, p=0.0
Independent contribution of VisualBERT to fMRI prediction without Exp48: rho=0.061, p=0.0


In [7]:
exp48_wo_models = {}
# Regressing mcse out of exp48

rho, pval = regress_model(model_to_remove=mcse_rdm, target_model=exp_rdm2, brain_data=brain_data_2)
print(f"Independent contribution of Exp48 to fMRI prediction without MCSE: rho={round(rho, 3)}, p={round(pval,3)}")
exp48_wo_models['mcse'] = rho

# Regressing simcse out of exp48
rho, pval = regress_model(model_to_remove=simcse_rdm, target_model=exp_rdm2, brain_data=brain_data_2)
print(f"Independent contribution of Exp48 to fMRI prediction without SimCSE: rho={round(rho, 3)}, p={round(pval,3)}")
exp48_wo_models['simcse'] = rho

# Regressing clap out of exp48
rho, pval = regress_model(model_to_remove=clap_rdm, target_model=exp_rdm2, brain_data=brain_data_2)
print(f"Independent contribution of Exp48 to fMRI prediction without CLAP: rho={round(rho, 3)}, p={round(pval,3)}")
exp48_wo_models['clap'] = rho

# Regressing bert out of exp48
rho, pval = regress_model(model_to_remove=bert_rdm, target_model=exp_rdm2, brain_data=brain_data_2)
print(f"Independent contribution ofExp48 to fMRI prediction without BERT: rho={round(rho, 3)}, p={round(pval,3)}")
exp48_wo_models['bert'] = rho

# Regressing visualbert out of exp48
rho, pval = regress_model(model_to_remove=vbert_rdm, target_model=exp_rdm2, brain_data=brain_data_2)
print(f"Independent contribution of Exp48 to fMRI prediction without VisualBERT: rho={round(rho, 3)}, p={round(pval,3)}")
exp48_wo_models['visualbert'] = rho

Independent contribution of Exp48 to fMRI prediction without MCSE: rho=0.214, p=0.0
Independent contribution of Exp48 to fMRI prediction without SimCSE: rho=0.19, p=0.0
Independent contribution of Exp48 to fMRI prediction without CLAP: rho=0.273, p=0.0
Independent contribution ofExp48 to fMRI prediction without BERT: rho=0.183, p=0.0
Independent contribution of Exp48 to fMRI prediction without VisualBERT: rho=0.259, p=0.0


In [8]:
# VL models without LM counterpart

# Regressing bert out of visualbert
rho, pval = regress_model(model_to_remove=bert_rdm, target_model=vbert_rdm, brain_data=brain_data_2)
print(f"Independent contribution of VisualBERT to fMRI prediction without BERT: rho={round(rho, 2)}, p={round(pval,3)}")


# Regressing SimCSE out of MCSE
rho, pval = regress_model(model_to_remove=simcse_rdm, target_model=mcse_rdm, brain_data=brain_data_2)
print(f"Independent contribution of MCSE to fMRI prediction without SimCSE: rho={round(rho, 2)}, p={round(pval,3)}")

Independent contribution of VisualBERT to fMRI prediction without BERT: rho=0.0, p=0.603
Independent contribution of MCSE to fMRI prediction without SimCSE: rho=0.0, p=0.659
