In [112]:
import numpy as np
from scipy.stats import wilcoxon
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [73]:
model_names = ['Llama', 'rwkv', 'roberta-large', 'gpt2xl']

In [2]:
# Example data for 5 participants
condition_pre = np.array([0.6, 0.7, 0.8, 0.5, 0.9])  # Pre-intervention scores
condition_post = np.array([0.7, 0.8, 0.85, 0.6, 0.95])  # Post-intervention scores

# Perform the Wilcoxon signed-rank test
stat, p_value = wilcoxon(condition_pre, condition_post, alternative='less')

# Output the results
print("Wilcoxon Signed-Rank Test:")
print(f"Statistic: {stat}")
print(f"P-value: {p_value}")

Wilcoxon Signed-Rank Test:
Statistic: 0.0
P-value: 0.03125


In [3]:
store_pvalues = {}
for dataset in ['pereira', 'blank', 'fedorenko']:
    for shuffle_str in ['shuffled', 'contig']:
        for noL2_str in ['_noL2custom', '']:
            results = pd.read_csv(f'/home2/ebrahim/beyond-brainscore/analyze_results/figures_code/figures_data/figure1/{dataset}_pearson_r{noL2_str}_{shuffle_str}.csv')
            for fe in ['', '-mp', '-sp']:
                results_gpt2xl = results.loc[results.Model==f'GPT2-XL{fe}']['perf'].to_numpy()
                results_oasm = results.loc[results.Model=='OASM']['perf'].to_numpy()
                result = wilcoxon(results_gpt2xl, results_oasm, alternative='greater')
                if np.mean(results_oasm) > np.mean(results_gpt2xl):
                    store_pvalues[f"{dataset}_{shuffle_str}{noL2_str}_{fe}"] = f"OASM better: {result.pvalue}"
                else:
                    store_pvalues[f"{dataset}_{shuffle_str}{noL2_str}_{fe}"] = f"GPT2XL better: {result.pvalue}"
store_pvalues

  temp = _wilcoxon_iv(x, y, zero_method, correction, alternative, method, axis)


{'pereira_shuffled_noL2custom_': 'OASM better: 1.0',
 'pereira_shuffled_noL2custom_-mp': 'OASM better: 1.0',
 'pereira_shuffled_noL2custom_-sp': 'OASM better: 1.0',
 'pereira_shuffled_': 'OASM better: 1.0',
 'pereira_shuffled_-mp': 'OASM better: 1.0',
 'pereira_shuffled_-sp': 'OASM better: 0.9990234375',
 'pereira_contig_noL2custom_': 'GPT2XL better: 0.0009765625',
 'pereira_contig_noL2custom_-mp': 'GPT2XL better: 0.0009765625',
 'pereira_contig_noL2custom_-sp': 'GPT2XL better: 0.0009765625',
 'pereira_contig_': 'GPT2XL better: 0.0009765625',
 'pereira_contig_-mp': 'GPT2XL better: 0.0009765625',
 'pereira_contig_-sp': 'GPT2XL better: 0.0009765625',
 'blank_shuffled_noL2custom_': 'OASM better: 1.0',
 'blank_shuffled_noL2custom_-mp': 'OASM better: 1.0',
 'blank_shuffled_noL2custom_-sp': 'OASM better: 1.0',
 'blank_shuffled_': 'OASM better: 1.0',
 'blank_shuffled_-mp': 'OASM better: 1.0',
 'blank_shuffled_-sp': 'OASM better: 1.0',
 'blank_contig_noL2custom_': 'GPT2XL better: 0.03394457743

In [5]:
store_pvalues = {}
for dataset in ['pereira', 'blank', 'fedorenko']:
    for fe in ['-lt', '-mp', '-sp']:
        results = pd.read_csv(f'/home2/ebrahim/beyond-brainscore/analyze_results/figures_code/figures_data/figure4/{dataset}_pearson_r.csv')
        results_gpt2xl = results.loc[results.Model==f'GPT2XL{fe}']['perf'].to_numpy()
        results_simple = results.loc[results.Model=='Simple']['perf'].to_numpy()
        result = wilcoxon(results_gpt2xl, results_simple)
        
                    
        if np.mean(results_simple) > np.mean(results_gpt2xl):
            store_pvalues[f"{dataset}_{fe}"] = f"Simple better: {result.pvalue}"
        else:
            store_pvalues[f"{dataset}_{fe}"] = f"GPT2XL better: {result.pvalue}"
            
store_pvalues
    

{'pereira_-lt': 'GPT2XL better: 0.005859375',
 'pereira_-mp': 'GPT2XL better: 0.10546875',
 'pereira_-sp': 'GPT2XL better: 0.02734375',
 'blank_-lt': 'Simple better: 0.0625',
 'blank_-mp': 'Simple better: 0.0625',
 'blank_-sp': 'Simple better: 0.0625',
 'fedorenko_-lt': 'GPT2XL better: 1.0',
 'fedorenko_-mp': 'GPT2XL better: 1.0',
 'fedorenko_-sp': 'GPT2XL better: 0.625'}

In [6]:
store_pvalues = {}
for dataset in ['pereira', 'blank', 'fedorenko']:
    for fe in ['-lt', '-mp', '-sp']:
        results = pd.read_csv(f'/home2/ebrahim/beyond-brainscore/analyze_results/figures_code/figures_data/figure5/{dataset}_pearson_r.csv')
        results_gpt2xl = results.loc[results.Model==f'GPT2XLU{fe}']['perf'].to_numpy()
        results_simple = results.loc[results.Model=='Simple']['perf'].to_numpy()
        result = wilcoxon(results_gpt2xl, results_simple)
        
        if np.mean(results_simple) > np.mean(results_gpt2xl):
            store_pvalues[f"{dataset}_{fe}"] = f"Simple better: {result.pvalue}"
        else:
            store_pvalues[f"{dataset}_{fe}"] = f"GPT2XL better: {result.pvalue}"
            
store_pvalues

{'pereira_-lt': 'Simple better: 0.009765625',
 'pereira_-mp': 'Simple better: 0.130859375',
 'pereira_-sp': 'Simple better: 0.556640625',
 'blank_-lt': 'Simple better: 0.0625',
 'blank_-mp': 'Simple better: 0.125',
 'blank_-sp': 'Simple better: 0.0625',
 'fedorenko_-lt': 'Simple better: 0.3125',
 'fedorenko_-mp': 'Simple better: 0.4375',
 'fedorenko_-sp': 'Simple better: 0.3125'}

In [157]:
color_palette = sns.color_palette(["gray", "blue", 'black'])
def compute_frac_sig(feature_extraction_arr, dataset, figure_num, llm_name=''):
    
    df = pd.read_csv(f'/home2/ebrahim/beyond-brainscore/analyze_results/figures_code/figures_data/figure{figure_num}/pvalues_{dataset}_{llm_name}.csv')

    
    fig, ax = plt.subplots(1,1,figsize=(3,4))
    plot_stats_df = {'values': [], 'feature_extraction_arr': [], 'dataset': []}
    
    for fe in feature_extraction_arr: 
        
        df_fe = df.loc[df.fe==fe].copy()
        print(df_fe.shape, dataset)
        df_fe['pval_sig'] = np.where(df_fe['pval']<0.05, 1, 0)

        df_fe['pval_LLM_sig'] = np.where(df_fe['pval_LLM_sig']<0.05, 1, 0)
  
        #df_fe = df_fe.loc[df_fe[f'pval_LLM_sig'] == 1]

        if df_fe.shape[0] == 0:
            print(f"No significant voxels/electrodes/fROIs for {llm_name}{fe} for {dataset}")
    
        # Calculate the mean proportion of significant p-values per subject
        subject_means = df_fe.groupby('subject')['pval_sig'].mean()

        plot_stats_df['values'].extend(np.array(subject_means)*100)
        plot_stats_df['feature_extraction_arr'].extend(np.repeat(fe, len(subject_means)))
        plot_stats_df['dataset'].extend(np.repeat(dataset, len(subject_means)))
        
        
    plot_stats_df = pd.DataFrame(plot_stats_df)
    sns.barplot(plot_stats_df, hue='feature_extraction_arr', y='values', x='dataset', alpha=0.4, legend=False, errorbar=None, palette=color_palette,ax=ax)
    sns.stripplot(plot_stats_df, hue='feature_extraction_arr', y='values', x='dataset', dodge=True, size=10, alpha=0.8, legend=False, palette=color_palette,ax=ax)
    sns.despine()
    ax.set_xlabel('')
    ax.set_xticklabels('')
    ax.set_xticks([])
    ax.set_yticks([0,30])
    ax.set_yticklabels([0,30], fontsize=20)
    ax.set_ylabel('')
    fig.savefig(f'/home2/ebrahim/beyond-brainscore/analyze_results/figures_code/figures/new_figures/figure4/sig/frac_sig_{dataset}_{llm_name}.png', bbox_inches='tight')
    fig.savefig(f'/home2/ebrahim/beyond-brainscore/analyze_results/figures_code/figures/new_figures/figure4/sig/frac_sig_{dataset}_{llm_name}.pdf', bbox_inches='tight')   
    plt.close()

In [158]:
#compute_frac_sig(['-lt', '-mp', '-sp'], 'pereira',2)
#compute_frac_sig(['-lt', '-mp', '-sp'], 'fedorenko',2)
#compute_frac_sig(['-lt', '-mp', '-sp'], 'blank',2)

for model in 
compute_frac_sig(['-lt', '-mp', '-sp'], 'pereira', 4, 'gpt2xl')
compute_frac_sig(['-lt', '-mp', '-sp'], 'fedorenko', 4, 'gpt2xl')
compute_frac_sig(['-lt', '-mp', '-sp'], 'blank', 4, 'gpt2xl')

In [160]:
for model in model_names:
    print(model)
    compute_frac_sig(['-lt', '-mp', '-sp'], 'pereira', 4, model)
    compute_frac_sig(['-lt', '-mp', '-sp'], 'fedorenko', 4, model)
    compute_frac_sig(['-lt', '-mp', '-sp'], 'blank', 4, model)

gpt2xl
(13553, 8) pereira
(13553, 8) pereira
(13553, 8) pereira
(97, 8) fedorenko
(97, 8) fedorenko
(97, 8) fedorenko
(60, 8) blank
(60, 8) blank
(60, 8) blank


In [22]:
compute_frac_sig(['-lt', '-mp', '-sp'], 'pereira',5)
compute_frac_sig(['-lt', '-mp', '-sp'], 'fedorenko',5)
compute_frac_sig(['-lt', '-mp', '-sp'], 'blank',5)

KeyError: 'pval_gpt2xl_sig'