In [1]:


import sys
import xarray as xr
import numpy as np
import torch
import os
import random 
from tqdm import tqdm 
import pickle 
import warnings
warnings.filterwarnings('ignore')
import random    
random.seed(0)
import scipy.stats as st


ROOT = os.getenv('BONNER_ROOT_PATH')
sys.path.append(ROOT)
from config import CACHE, MAJAJ_DATA 
from model_evaluation.predicting_brain_data.regression.regression import regression_shared_unshared, pearson_r, regression_cv
from model_evaluation.predicting_brain_data.regression.regression_cv_mod import RidgeCVMod
from sklearn.linear_model import Ridge

DATASET = 'majajhong'
SUBJECTS = ['Chabo','Tito']
ALPHA_RANGE = [10**i for i in range(1,10)]

In [27]:
activations_identifier = 'alexnet_conv5_5_layers_256_features_majajhong'
activations_data = xr.open_dataset(os.path.join(CACHE,'activations',activations_identifier), engine='netcdf4')
activations_data = activations_data.sortby('stimulus_id', ascending=True)
print(activations_data.stimulus_id.values)
X = torch.Tensor(activations_data.x.values)

['0015b49a190e9bce70b108b28dc1a0674d3c9e66'
 '00487445258f44c4de1c0a681a21e618c9e148f8'
 '0088124f2856fcb7adc5c47a6b5d426f58d7b96a' ...
 'ffdf4f231454c92cdb2af94414885fa5137a2974'
 'ffe49726e966a565466b749cf66ef26c21227033'
 'ffefd7bb06be3b40589a3487d14991ca925dc618']


In [15]:
activations_identifier = 'alexnet_conv5_5_layers_256_features_majajhong'
activations_data = xr.open_dataset(os.path.join(CACHE,'activations',activations_identifier), engine='netcdf4')
#activations_data = xr.open_dataarray(os.path.join(CACHE,'activations',activations_identifier), engine='netcdf4')
TRAIN_IDS =  pickle.load(open(os.path.join(ROOT,'model_evaluation/predicting_brain_data/benchmarks','majaj_train_ids'), "rb"))
activations_data = activations_data.set_index({'presentation':'stimulus_id'})


In [16]:
activations_data

In [18]:
activations_data = activations_data.sel(presentation=TRAIN_IDS)
#activations_data = activations_data.sortby('stimulus_id', ascending=True)

In [21]:
activations_data.sortby('presentation', ascending=True)

In [28]:
MAJAJ_DATA = '/data/atlas/neural_data/majajhong'
file_name = f'SUBJECT_Tito_REGION_IT'
file_path = os.path.join(MAJAJ_DATA,file_name)
neural_data = xr.open_dataset(file_path, engine='netcdf4')
print(neural_data.stimulus_id.values)
y = torch.Tensor(neural_data['x'].values.squeeze())

['0015b49a190e9bce70b108b28dc1a0674d3c9e66'
 '00487445258f44c4de1c0a681a21e618c9e148f8'
 '0088124f2856fcb7adc5c47a6b5d426f58d7b96a' ...
 'ffdf4f231454c92cdb2af94414885fa5137a2974'
 'ffe49726e966a565466b749cf66ef26c21227033'
 'ffefd7bb06be3b40589a3487d14991ca925dc618']


In [29]:
y_true, y_predicted = regression_cv(x=X, y=y, model = Ridge(alpha=1))

r = torch.stack(
    [
        pearson_r(y_true_, y_predicted_)
        for y_true_, y_predicted_ in zip(y_true, y_predicted)
    ]
).mean(dim=0)

                                                                                                          

In [30]:
r.mean()

tensor(0.0094)

In [None]:
ds_tmp = xr.Dataset(data_vars=dict(r_value=(["r_values"], r)),
                coords={'subject': (['r_values'], [subject for i in range(len(r))]),
                        'region': (['r_values'], [region for i in range(len(r))])
                         })

ds = xr.concat([ds,ds_tmp],dim='r_values')   
pbar.update(1)

NSD

In [2]:
from sklearn.linear_model import Ridge

import sys
import xarray as xr
import numpy as np
import torch
import os
import random 
from tqdm import tqdm 
import pickle 
import warnings
warnings.filterwarnings('ignore')
import random    
random.seed(0)
import scipy.stats as st

ROOT = os.getenv('BONNER_ROOT_PATH')
sys.path.append(ROOT)
from config import CACHE, NSD_NEURAL_DATA      

SHARED_IDS_PATH = os.path.join(ROOT, 'image_tools','nsd_ids_shared')
SHARED_IDS = pickle.load(open(SHARED_IDS_PATH, 'rb'))
SHARED_IDS = [image_id.strip('.png') for image_id in SHARED_IDS]
#ALPHA_RANGE = [10**i for i in range(3,7)]
ALPHA_RANGE = [10**i for i in range(9)]
    

In [3]:
def load_nsd_data(mode: str, subject: int, region: str) -> torch.Tensor:
        
        """
        
        Loads the neural data from disk for a particular subject and region.


        Parameters
        ----------
        mode:
            The type of neural data to load ('shared' or 'unshared')
            
        subject:
            The subject number 
        
        region:
            The region name
            
        return_ids: 
            Whether the image ids are returned 
        

        Returns
        -------
        A Tensor of Neural data, or Tensor of Neural data and stimulus ids
        
        """
        path = os.path.join(NSD_NEURAL_DATA,f'roi={region}/preprocessed/z_score=session.average_across_reps=True/subject={subject}.nc')
        
        var_name = f'allen2021.natural_scenes.preprocessing=fithrf_GLMdenoise_RR.roi={region}.z_score=session.average_across_reps=True.subject={subject}'

        
        ds = xr.open_dataset(path, engine='h5netcdf')

        if mode == 'unshared':
            data = ds.where(~ds.presentation.stimulus_id.isin(SHARED_IDS),drop=True)

        elif mode == 'shared':
            data = ds.where(ds.presentation.stimulus_id.isin(SHARED_IDS),drop=True)
            print()
            
        ids = list(data.presentation.stimulus_id.values)
            
        return ids, data, var_name
        
        
            
def filter_activations(data: xr.DataArray, ids: list) -> torch.Tensor:
            
        """
    
        Filters model activations using image ids.


        Parameters
        ----------
        data:
            Model activation data
            
        ids:
            image ids
        

        Returns
        -------
        A Tensor of model activations filtered by image ids
        
        """
        
        data = data.set_index({'presentation':'stimulus_id'})
        activations = data.sel(presentation=ids)
        activations = activations.sortby('presentation', ascending=True)

        return activations.values


In [4]:
activations_identifier = 'alexnet_conv5_5_layers_256_features_naturalscenes'
activations_data = xr.open_dataarray(os.path.join(CACHE,'activations',activations_identifier), engine='netcdf4')
print(activations_data.stimulus_id.values)

['image00000' 'image00001' 'image00002' ... 'image72997' 'image72998'
 'image72999']
