# Statistical tests

This notebook contains code to replicate the statistical tests we conducted.

In [1]:
import numpy as np
from scipy.stats import spearmanr, pearsonr, norm
from scipy.stats import ttest_rel
from src.paths import ROOT
from src.utils import * 
import numpy as np

In [2]:
def corr_significance_test(sample_size, corr1, corr2):
    
    # Fisher transformation
    fisher1 = 0.5 * np.log((1 + corr1) / (1 - corr1))
    fisher2 = 0.5 * np.log((1 + corr2) / (1 - corr2))
    

    expected_sd = np.sqrt(1.060 / (sample_size - 3))
    
    # Compute p-value
    z_score = abs(fisher1 - fisher2) / expected_sd
    p_value = 2 * (1 - norm.cdf(z_score))
    
    return p_value


In [3]:
brain_results = open_json(ROOT / 'results/rsa/rsa_brain_set2_top3.json')

## Objects vs. Events

In [27]:
n = 160
size = (n*n-n)/2
models = ['simcse', 'mcse', 'bert', 'visualbert']

indices = np.triu_indices(n=len(models), m=len(models), k=1)

for ind1, ind2 in zip(indices[0], indices[1]):
    corr1 = brain_results[models[ind1]]['context']['objects']
    corr2 = brain_results[models[ind2]]['context']['objects']
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    # t, p_val = ttest_rel()
    
    # Bonferroni correction
    corrected_pval = 0.05 / len(indices[0])
    if p_val<corrected_pval:
        print(models[ind1], models[ind2], p_val)


In [4]:
n = 160
size = (n*n-n)/2
models = ['simcse', 'mcse', 'bert', 'visualbert']
corrs = [brain_results[model]['context']['events'] for model in models] + [0.236]
models = models + ['exp48']
indices = np.triu_indices(n=len(models), m=len(models), k=1)

for ind1, ind2 in zip(indices[0], indices[1]):
    corr1 = corrs[ind1]
    corr2 = corrs[ind2]
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    corrected_pval = 0.05 / len(indices[0])
    if p_val>corrected_pval:
        print(models[ind1], models[ind2], p_val)


simcse exp48 0.022933097110932055


In [5]:
n = 160
size = (n*n-n)/2
models = ['simcse', 'mcse', 'bert', 'visualbert']
corrs = [brain_results[model]['context']['objects'] for model in models] + [0.169]
models = models + ['exp48']
indices = np.triu_indices(n=len(models), m=len(models), k=1)

for ind1, ind2 in zip(indices[0], indices[1]):
    corr1 = corrs[ind1]
    corr2 = corrs[ind2]
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    corrected_pval = 0.05 / len(indices[0])
    if p_val<corrected_pval:
        print(models[ind1], models[ind2], p_val)


simcse exp48 0.0006392325582973424
mcse exp48 8.668658901811455e-09
bert exp48 1.22432984976939e-05
visualbert exp48 2.603060824668546e-07


In [7]:
n = 160
size = (n*n-n)/2
models = ['simcse', 'mcse', 'bert', 'visualbert']
indices = np.triu_indices(n=len(models), m=len(models), k=1)
corr_events = [brain_results[model]['context']['events']for model in models] + [0.236]
corr_objects = [brain_results[model]['context']['objects']for model in models] + [0.169]
models += ['exp48']

for i, model in enumerate(models):
    corr1 = corr_events[i]
    corr2 = corr_objects[i]
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    corrected_pval = 0.05 / len(models)
    if p_val>corrected_pval:
        print(model, p_val)


## Full Set

In [8]:
n = 320
size = (n*n-n)/2
models = ['simcse', 'mcse', 'clap', 'bert', 'visualbert']
corrs = [brain_results[model]['context']['set2'] for model in models] + [0.273]
models = models + ['exp48']
indices = np.triu_indices(n=len(models), m=len(models), k=1)

for ind1, ind2 in zip(indices[0], indices[1]):
    corr1 = corrs[ind1]
    corr2 = corrs[ind2]
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    corrected_pval = 0.05 / len(indices[0])
    if p_val>corrected_pval:
        print(models[ind1], models[ind2], p_val)


In [37]:
n = 320
size = (n*n-n)/2
models = ['simcse', 'mcse', 'bert', 'visualbert']
indices = np.triu_indices(n=len(models), m=len(models), k=1)

for ind1, ind2 in zip(indices[0], indices[1]):
    corr1 = brain_results[models[ind1]]['visual context']['set2']
    corr2 = brain_results[models[ind2]]['visual context']['set2']
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    corrected_pval = 0.05 / len(indices[0]+1)
    if p_val>corrected_pval:
        print(models[ind1], models[ind2], p_val)


In [4]:
exp48_results = open_json(ROOT / 'results/rsa/rsa_exp48_set2_top3.json')

In [5]:
n = 320
size = (n*n-n)/2
models = ['simcse', 'mcse', 'clap', 'bert', 'visualbert']
indices = np.triu_indices(n=len(models), m=len(models), k=1)

for ind1, ind2 in zip(indices[0], indices[1]):
    corr1 = exp48_results[models[ind1]]['context']['set2']
    corr2 = exp48_results[models[ind2]]['context']['set2']
    p_val = corr_significance_test(size, corr1=corr1, corr2=corr2)
    corrected_pval = 0.05 / len(indices[0])
    if p_val>corrected_pval:
        print(models[ind1], models[ind2], p_val)
