When the L2 penalty is high, the weights are sometimes set to very close to 0 values. This leads to basically constant predictions,  
which results in a pearson r value of nan. To avoid this from occurring, we run a separate regression where the L2 penalty is capped
to a smaller value. 

In this script, we replace the nan pearson r voxels with the pearson r values generated with the clipped models.


In [3]:
import numpy as np

In [24]:
def replace_nan_pearsonr(uncapped_models, capped_models, resultsPath):
    '''
    Loops through uncapped_models and capped_models, and loads their pearson_r values.
    Checks to make sure there are no NaN values in capped_model (cm).
    If this check is satisfied, replaces all the NaN pearson_r values in uncapped_model (um)
    with the values from the same indices from cm. Then resaves this updated data into the um filepath.
    '''
    for um, cm in zip(uncapped_models, capped_models):
        # Construct file paths
        um_path = f"{resultsPath}{um}"
        cm_path = f"{resultsPath}{cm}"
        
        # Load .npz files
        um_data = np.load(um_path)
        cm_data = np.load(cm_path)
        
        # Convert NpzFile to a mutable dictionary
        um_dict = {key: um_data[key] for key in um_data}
        cm_pearsonr = cm_data['pearson_r']
        
        # Check for NaN values in capped_model
        if np.isnan(cm_pearsonr).any():
            raise ValueError(f"NaN values found in capped model {cm}")
        
        # Replace NaN values in uncapped_model's pearson_r
        um_pearsonr = um_dict['pearson_r']
        nan_indices = np.isnan(um_pearsonr)
        um_pearsonr[nan_indices] = cm_pearsonr[nan_indices]
        
        # Update the dictionary with modified pearson_r
        um_dict['pearson_r'] = um_pearsonr
        
        um_mod_path = um_path.replace('.npz', '_mod.npz')
        np.savez(um_mod_path, **um_dict)

In [31]:
resultsPath_pereira = "/data/LLMs/brainscore/results_pereira/"
nan_models = ['pereira_trained-var-par_384_pos+WN_1_384.npz', 
              'pereira_trained-var-par_243_pos+WN_1_243.npz', 
              'pereira_trained-var-par_384_pos_1_384.npz', 
              'pereira_trained-var-par_243_pos_1_243.npz']

capped_models = ['pereira_positional_WN_smooth_layer_1.2_1_L2-capped_384.npz', 
                 'pereira_positional_WN_smooth_layer_0.5_1_L2-capped_243.npz', 
                'pereira_position_layer_1.2_1_L2-capped_384.npz', 
                'pereira_position_layer_0.5_1_L2-capped_243.npz']

replace_nan_pearsonr(nan_models, capped_models, resultsPath_pereira)

In [30]:
resultsPath_fedorenko = "/data/LLMs/brainscore/results_fedorenko/"
nan_models = ['fedorenko_trained-var-par_WP_1.npz']
capped_models = ['fedorenko_pos_layer_4.7_1_L2-capped.npz']
replace_nan_pearsonr(nan_models, capped_models, resultsPath_fedorenko)