In [1]:
import numpy as np
from trained_untrained_results_funcs import find_best_layer, elementwise_max, custom_add_2d, load_perf, loop_through_datasets
from untrained_results_funcs import load_untrained_data
from plotting_functions import plot_across_subjects, load_into_3d, save_nii, plot_2d_hist_scatter_updated
from matplotlib import pyplot as plt
from stats_funcs import compute_paired_ttest
import pandas as pd
import seaborn as sns
from nilearn import plotting
import matplotlib


resultsPath_base = '/data/LLMs/brainscore/'
figurePath = 'figures/new_figures/figusure4/'

br_labels_dict = {}
num_vox_dict = {}
subjects_dict = {}

data_processed_folder_pereira = f'/data/LLMs/data_processed/pereira/dataset'
data_processed_folder_fed = f'/data/LLMs/data_processed/fedorenko/dataset'
data_processed_folder_blank = f'/data/LLMs/data_processed/blank/dataset'


omega_metric = {'feature_extraction': [], 'dataset': [], 'values': []}

plot_legend = False
palette = ['gray', 'blue', 'black']
perf_str=''
plot_xlabel = False
remove_y_axis = False
num_seeds = 5
feature_extraction_arr = ['', '-mp', '-sp']
perf_arr = ['out_of_sample_r2']
shuffled_arr = ['']
shuffled = ''
dataset_arr = ['pereira', 'fedorenko', 'blank']

for e in ['243', '384']:

    bre = np.load(f'{data_processed_folder_pereira}/networks_{e}.npy', allow_pickle=True)
    br_labels_dict[e] = bre
    num_vox_dict[e] = bre.shape[0]
    subjects_dict[e] = np.load(f"{data_processed_folder_pereira}/subjects_{e}.npy", allow_pickle=True)
    
lang_indices_384 = np.argwhere(br_labels_dict['384'] == 'language').squeeze()
lang_indices_243 = np.argwhere(br_labels_dict['243'] == 'language').squeeze()
lang_indices_dict = {}
lang_indices_dict['384'] = lang_indices_384
lang_indices_dict['243'] = lang_indices_243

subjects_arr_fed  = np.load(f"{data_processed_folder_fed}/subjects.npy", allow_pickle=True)
subjects_arr_blank  = np.load(f"{data_processed_folder_blank}/subjects.npy", allow_pickle=True)
subjects_arr_pereira = np.load(f"{data_processed_folder_pereira}/subjects_complete.npy", allow_pickle=True)
subjects_dict_all = {'pereira': subjects_dict, 'fedorenko': subjects_arr_fed, 'blank': subjects_arr_blank}

networks_dict_all = {'pereira': br_labels_dict, 'fedorenko': np.repeat('language', len(subjects_arr_fed)), 
                      'blank': np.repeat('language', len(subjects_arr_blank))}

shape_pereira_full = (627, int(subjects_arr_pereira.shape[0]))

non_nan_indices_243 = np.load(f"{data_processed_folder_pereira}/non_nan_indices_243.npy") # voxels which are in 243
non_nan_indices_384 = np.load(f"{data_processed_folder_pereira}/non_nan_indices_384.npy") # voxels which are in 384
non_nan_indices_dict = {'384': non_nan_indices_384, '243': non_nan_indices_243}

resultsPath = '/data/LLMs/brainscore/'

se_intercept_243 = np.load(f'{resultsPath}results_pereira/mse_intercept_243.npy')
se_intercept_384 = np.load(f'{resultsPath}results_pereira/mse_intercept_384.npy')
se_intercept_pereira_full = np.full(shape_pereira_full, fill_value=np.nan)
se_intercept_pereira_full[:243, non_nan_indices_243] = se_intercept_243
se_intercept_pereira_full[243:, non_nan_indices_384] = se_intercept_384

se_intercept_fed = np.load(f'{resultsPath}results_fedorenko/mse_intercept.npy')

se_intercept_blank = np.load(f'{resultsPath}results_blank/mse_intercept.npy')


se_intercept_dict = {'pereira': se_intercept_pereira_full, 'fedorenko': se_intercept_fed, 
                     'blank': se_intercept_blank}
                    
save_best_layer = []
clip_zero = False 
median = False

In [2]:
def load_model_data_into_df(model_names, plot_names, dataset_list, exp_list, 
                            subjects_dict, networks_dict, perf='pearson_r'):

    results = {'perf':[], 'subjects': [], 'Network': [], 
                                        'Model': [], 'Exp': []}

    for mn, pn, dataset, exp in zip(model_names, plot_names, dataset_list, exp_list):
        
        model_res = np.load(f"/data/LLMs/brainscore/results_{dataset}/{dataset}_{mn}.npz")[perf]
        
        subjects = subjects_dict[dataset]
        networks = networks_dict[dataset]
        if len(exp) > 0:
            subjects = subjects[exp]
            networks = networks[exp]
            
        results['perf'].extend(model_res)
        results['subjects'].extend(subjects)
        results['Network'].extend(networks)
        results['Model'].extend(np.repeat(pn, len(model_res)))
        results['Exp'].extend(np.repeat(exp.strip('_'), len(model_res)))

    
    return pd.DataFrame(results)