# Partial correlation analysis

This notebook contains code to reproduce the ablation study performed for the sentence condition, i.e., the partial correlation analysis where we regressed LMs' RDMs out of VLMs's RDMs.

In [None]:
# importing necessary libraries and modules
from src.paths import ROOT
from src.utils import open_json
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from scipy.stats import spearmanr
from src.best_layers import *

In [None]:
# importing bran rdms
brain_rdms = open_json(ROOT / 'data/participant_rdms.json')

In [None]:
# function to perform partial correlation analysis

def regress_model(model_to_remove_rdm, target_model_rdm, brain_rdm):
    '''
        This function takes as input the target model RDM (target_model_rdm), the RDM that has
        to be regressed out (model_to_remove_rdm) and the fMRI-derived RDM (brain_rdm). It 
        performes the necessary linear regression and outputs residuals, the Spearman correlation
        between residuals and brain RDM (rho) and its associated p-value.
    
    '''

    # fitting linear regression
    lm = LinearRegression().fit(model_to_remove_rdm.reshape(-1, 1), target_model_rdm)

    # obtaining residuals
    residuals = target_model_rdm - lm.predict(model_to_remove_rdm.reshape(-1, 1))

    # computing partial correlation and p-value
    rho, pval = spearmanr(residuals, brain_rdm)
    
    return residuals, rho, pval



## Computing partial correlations

In [None]:
# importing model RDMs
rdms_sentence = open_json(ROOT / 'data/sentences_model_rdms.json')

# creating a dict that maps VLMs to the language-only counterparts
model_to_counterpart = {'visualbert': 'bert', 'lxmert': 'bert', 'llava': 'llama', 'align': 'bert', 'idefics2': 'mistral'}

In [None]:
# creating a dist to save partial correlation results
partial_correlations_sentence = {}
triu_indices = np.triu_indices(n=180, m=180, k=1)

# iterating through vlms
for model in model_to_counterpart:
    partial_correlations_sentence[model] = {}
    lo_model = model_to_counterpart[model]
    
    # iterating through brain networks
    for network in ['languageLH', 'languageRH', 'visual']:

        # selecting the off-diagonal of RDM for the language-only counterpart
        counterpart_rdm = np.array(rdms_sentence[lo_model][best_layers_sentence[lo_model][network]])[triu_indices]
        
        # selecting the off-diagonal of brain RDM
        brain_rdm = np.array(brain_rdms['sentence'][network])[triu_indices]
        
        # creating model-specific dict
        partial_correlations_sentence[model][network] = {}

        # selecting the off-diagonal of vlm RDM
        model_rdm = np.array(rdms_sentence[model][best_layers_sentence[model][network]])[triu_indices]
        
        # computing and storing intitial correlation
        rho, pval = spearmanr(model_rdm, brain_rdm)
        print(f"Initial Rho: {round(rho,2)}\tpval:{round(pval, 3)}")
        partial_correlations_sentence[model][network]["initial"] = rho

        # computing and storing partial correlation
        residuals, rho, pval =  regress_model(model_to_remove=counterpart_rdm, target_model=model_rdm, brain_data=brain_rdm)
        print(f"Rho: {round(rho,3)}\tpval:{round(pval, 3)}")
        partial_correlations_sentence[model][network]["wo_language"] = rho
        
    

Initial Rho: 0.1	pval:0.0
Rho: 0.098	pval:0.0
Initial Rho: 0.06	pval:0.0
Rho: 0.043	pval:0.0
Initial Rho: 0.09	pval:0.0
Rho: 0.087	pval:0.0
Initial Rho: 0.08	pval:0.0
Rho: 0.081	pval:0.0
Initial Rho: 0.05	pval:0.0
Rho: 0.04	pval:0.0
Initial Rho: 0.08	pval:0.0
Rho: 0.068	pval:0.0
Initial Rho: 0.01	pval:0.103
Rho: 0.003	pval:0.664
Initial Rho: 0.06	pval:0.0
Rho: 0.032	pval:0.0
Initial Rho: 0.06	pval:0.0
Rho: 0.027	pval:0.001
Initial Rho: 0.02	pval:0.008
Rho: 0.02	pval:0.012
Initial Rho: -0.06	pval:0.0
Rho: -0.059	pval:0.0
Initial Rho: 0.02	pval:0.005
Rho: 0.021	pval:0.008
Initial Rho: 0.05	pval:0.0
Rho: 0.009	pval:0.265
Initial Rho: 0.08	pval:0.0
Rho: 0.001	pval:0.86
Initial Rho: 0.09	pval:0.0
Rho: 0.045	pval:0.0


### Performing statistical tests

In [None]:
from src.statistical_tests import corr_significance_test

models = ['align', 'visualbert', 'lxmert', 'llava', 'idefics2']
n = 180
size = (n*n-n)/2

corrected_alpha = 0.05 / 5

for network in ['languageLH', 'languageRH', 'visual']:

    print(network)
    
    for model in models:
        corr_1 = partial_correlations_sentence[model][network]['initial']
        corr_2 = partial_correlations_sentence[model][network]['wo_language']
        p_val = corr_significance_test(size, corr1=corr_1, corr2=corr_2)

        if p_val > corrected_alpha:
            print(f"\tThe difference between initial and partial correlations for {model} is not statistically significant")
       
    