In [44]:
import numpy as np

def mean_center_data(X, y):
    """
    Mean centers the features (X) and target (y) data.

    Parameters:
    X (numpy.ndarray): The feature data.
    y (numpy.ndarray): The target data.

    Returns:
    numpy.ndarray, numpy.ndarray: Mean-centered feature and target data.
    """
    X_mean = X.mean(axis=0)
    y_mean = y.mean(axis=0) if y.ndim > 1 else y.mean()

    X_centered = X - X_mean
    y_centered = y - y_mean

    return X_centered, y_centered

In [45]:
import torch

def preprocess_data(X, y, center='none', scale='none', output=None, fit_intercept=False):
    stats = {f'{var}_{stat}': None for stat in ['mean','std','offset','scale'] for var in ['X', 'y']}

    def parse_preprocessing_args(*args):
        parsed_args = []
        for arg in args:
            if arg is None or len(arg) == 0:
                parsed_args.append('none')
            elif isinstance(arg, list):
                parsed_args.append(''.join(arg))
            else:
                parsed_args.append(arg)
        return tuple(parsed_args)

    center, scale, output = parse_preprocessing_args(center, scale, output)
    
    if fit_intercept:
        center += 'x'

    if 'x' in center.lower():
        stats['X_mean'] = X.mean(dim = 0)
    if 'y' in center.lower():
        stats['y_mean'] = y.mean(dim = 0) if y.ndim > 1 else y.mean()
        
    if 'x' in scale.lower():
        stats['X_std'] = X.std(dim=0, correction=1)
        stats['X_std'][stats['X_std'] == 0.0] = 1.0  
    if 'y' in scale.lower():
        stats['y_std'] = y.std(dim=0, correction=1)
        stats['y_std'][stats['y_std'] == 0.0] = 1.0 
    
    if 'x' in center.lower():
        X -= stats['X_mean']
    if 'y' in center.lower():
        y -= stats['y_mean']
        
    if 'x' in scale.lower():
        X /= stats['X_std']
    if 'y' in scale.lower():
        y /= stats['y_std']

    if output == 'mean_std':
        if stats['X_mean'] is None:
            stats['X_mean'] = X.mean(dim=0)
        if stats['y_mean'] is None:
            stats['y_mean'] = y.mean(dim = 0) if y.ndim > 1 else y.mean()
        if stats['X_std'] is None:
            stats['X_std'] = torch.ones(X.shape[1], dtype=X.dtype,  device=X.device)
        if stats['y_std'] is None:
            stats['y_std'] = torch.ones(y.shape[1], dtype=y.dtype,  device=y.device)

    if output == 'offset_scale':
        stats['X_offset'] = stats.pop('X_mean', None)
        stats['y_offset'] = stats.pop('y_mean', None)
        stats['X_scale'] = stats.pop('X_std', None)
        if stats['X_offset'] is None:
            stats['X_offset'] = torch.zeros(X.shape[1], dtype=X.dtype, device=X.device)
        if stats['y_offset'] is None:
            stats['y_offset'] = torch.zeros(y.shape[1], dtype=y.dtype, device=y.device)
        if stats['X_scale'] is None:
            stats['X_scale'] = torch.ones(X.shape[1], dtype=X.dtype,  device=X.device)

    if output == 'offset_scale':
        return X, y, stats['X_offset'], stats['y_offset'], stats['X_scale']

    if output == 'mean_std':
        return X, y, stats['X_mean'], stats['y_mean'], stats['X_std'], stats['y_std']
    
    return X, y

# Example usage
# X, y = ... # your data here
# X_preprocessed, y_preprocessed = preprocess_data(X, y)


In [22]:
import sys
import xarray as xr
import numpy as np
import torch
import os
import random 
from tqdm import tqdm 
import pickle 
import warnings
warnings.filterwarnings('ignore')
import random    
random.seed(0)
import scipy.stats as st
import gc

ROOT = os.getenv('BONNER_ROOT_PATH')
sys.path.append(ROOT)
from config import CACHE, NSD_NEURAL_DATA, NSD_SAMPLE_IMAGES    

from model_evaluation.predicting_brain_data.regression.regression import regression_shared_unshared, pearson_r
from model_evaluation.predicting_brain_data.regression.torch_cv import TorchRidgeGCV
from sklearn.linear_model import Ridge


SHARED_IDS_PATH = os.path.join(ROOT, 'image_tools','nsd_ids_shared')
SHARED_IDS = pickle.load(open(SHARED_IDS_PATH, 'rb'))
SHARED_IDS = [image_id.strip('.png') for image_id in SHARED_IDS]

SAMPLE_IDS = pickle.load(open(NSD_SAMPLE_IMAGES, 'rb'))
SAMPLE_IDS = [image_id.strip('.png') for image_id in SAMPLE_IDS]

ALPHA_RANGE = [10**i for i in range(10)]
    
    
def normalize(X):
    X = (X - X.mean(axis=0)) / X.std(axis=0)
    X = np.nan_to_num(X)
    return X


            
            
def load_nsd_data(mode: str, subject: int, region: str, return_data:bool=True) -> torch.Tensor:
        
        """
        
        Loads the neural data from disk for a particular subject and region.


        Parameters
        ----------
        mode:
            The type of neural data to load ('shared' or 'unshared')
            
        subject:
            The subject number 
        
        region:
            The region name
            
        return_ids: 
            Whether the image ids are returned 
        

        Returns
        -------
        A Tensor of Neural data, or Tensor of Neural data and stimulus ids
        
        """
        path = os.path.join(NSD_NEURAL_DATA,f'roi={region}/preprocessed/z_score=session.average_across_reps=True/subject={subject}.nc')
        
        var_name = f'allen2021.natural_scenes.preprocessing=fithrf_GLMdenoise_RR.roi={region}.z_score=session.average_across_reps=True.subject={subject}'

        
        ds = xr.open_dataset(path, engine='h5netcdf')

        if mode == 'unshared':
            data = ds.where(~ds.presentation.stimulus_id.isin(SHARED_IDS),drop=True)

        elif mode == 'shared':
            data = ds.where(ds.presentation.stimulus_id.isin(SHARED_IDS),drop=True)
                        
        ids = list(data.presentation.stimulus_id.values)
            
        if return_data:
            return ids, data, var_name
        
        else: 
            return ids
        
        
            
def filter_activations(data: xr.DataArray, ids: list) -> torch.Tensor:
            
        """
    
        Filters model activations using image ids.


        Parameters
        ----------
        data:
            Model activation data
            
        ids:
            image ids
        

        Returns
        -------
        A Tensor of model activations filtered by image ids
        
        """
        
        data = data.set_index({'presentation':'stimulus_id'})
        activations = data.sel(presentation=ids)
        activations = activations.sortby('presentation', ascending=True)

        return activations.values
    
    
def normalize(X):
    X = (X - X.mean(axis=0)) / X.std(axis=0)
    X = np.nan_to_num(X)
    return X

In [46]:
region='V1'
subject = 0
device = 'cpu'
activations_identifier= f'expansion_30_dataset=naturalscenes_subject={subject}'


#load X_train and y_train
X_train = xr.open_dataset(os.path.join(CACHE,'activations', activations_identifier), 
                            engine='netcdf4').x.values 
_ , neural_data_train, var_name_train = load_nsd_data(mode = 'unshared', subject = subject, region = region)
y_train = neural_data_train[var_name_train].values

X_train, y_train = mean_center_data(X_train, y_train)

# corss validated ridge regression on training data to find optimal penalty term
regression = TorchRidgeGCV(
    alphas=ALPHA_RANGE,
    fit_intercept=True,
    scale_X=False,
    scoring='pearsonr',
    store_cv_values=False,
    alpha_per_target=False,
    device=device)

#regression.to('cpu')
regression.fit(X_train, y_train)
best_alpha = float(regression.alpha_)
print('best alpha:',best_alpha)
print('best score:',regression.score_)

best alpha: 1.0
best score: tensor(0.4173)


In [32]:
#load X_test and y_test
X_test = xr.open_dataset(os.path.join(CACHE,'activations',f'expansion_30_dataset=naturalscenes_shared_images')).x.values#.astype(np.float16)

In [52]:
_ , neural_data_test, var_name_test = load_nsd_data(mode ='shared',
                                                    subject = subject,
                                                    region = region)           
y_test = neural_data_test[var_name_test].values #.astype(np.float16)   

In [54]:
neural_data_test

In [34]:
# preprocessing_kwargs = {'output': 'offset_scale'}
# preprocessing_kwargs['center'] = 'x'
# X_train, y_train, X_offset, y_offset, X_scale = preprocess_data(torch.Tensor(X_train), torch.Tensor(y_train), **preprocessing_kwargs)
# X_test, y_test, X_offset, y_offset, X_scale = preprocess_data(torch.Tensor(X_test), torch.Tensor(y_test), **preprocessing_kwargs)

In [35]:
import numpy as np

def min_max_scale(X, feature_range=(0, 1)):
    """
    Scales the features of X to a specified range.

    Parameters:
    X (numpy.ndarray): The data to be scaled.
    feature_range (tuple): The desired range of transformed data.

    Returns:
    numpy.ndarray: Scaled data.
    """
    X_min, X_max = X.min(axis=0), X.max(axis=0)
    scale = feature_range[1] - feature_range[0]
    min_range = feature_range[0]

    # Avoid division by zero
    X_std = (X - X_min) / (X_max - X_min)
    X_scaled = X_std * scale + min_range

    return X_scaled

In [38]:
X_test = min_max_scale(X_test)
y_test = min_max_scale(y_test)

In [26]:
#X_train, y_train = mean_center_data(X_train, y_train)
#X_test, y_test = mean_center_data(X_test, y_test)

In [39]:
#model= Ridge(alpha=1000000)
#model.fit(X_train, y_train)
y_predicted = regression.predict(X_test)
r = pearson_r(torch.Tensor(y_test),torch.Tensor(y_predicted))
r.mean()

tensor(nan)

In [41]:
y_predicted

tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]])

In [22]:
l.append(r.mean())