In [None]:
!wget -nc https://osf.io/8qcdp/download -O ObjectOrientationData.mat
!wget -nc https://osf.io/49qeb/download -O InanimateObjectsData.mat
!wget -nc https://osf.io/x9dz4/download -O InanimateObjects.zip
!unzip InanimateObjects.zip
!mkdir -p Stimuli 
!mv InanimateObjects ./Stimuli/InanimateObjects
!wget -c https://raw.githubusercontent.com/harvard-visionlab/sroh/main/2022/feature_extractor.py

File ‘ObjectOrientationData.mat’ already there; not retrieving.
File ‘InanimateObjectsData.mat’ already there; not retrieving.
File ‘InanimateObjects.zip’ already there; not retrieving.
Archive:  InanimateObjects.zip
replace InanimateObjects/spoon.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: mv: cannot move 'InanimateObjects' to './Stimuli/InanimateObjects/InanimateObjects': Directory not empty
--2022-07-28 01:02:22--  https://raw.githubusercontent.com/harvard-visionlab/sroh/main/2022/feature_extractor.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 416 Range Not Satisfiable

    The file is already fully retrieved; nothing to do.



#Getting Brain Data

In [None]:
import os
import scipy.io as sio
import numpy as np
import torch


def load_brain_data(dataset, brain_regions=['EarlyV', 'pOTC', 'aOTC']):
    assert dataset in ['InanimateObjects', 'ObjectOrientation']
    path = f'{dataset}Data.mat'
    D = sio.loadmat(path, struct_as_record=False, squeeze_me=True)
    rdms = {r: D['rdms'].__dict__[r] for r in brain_regions}
    betas = {r: D['betas'].__dict__[r] for r in brain_regions}
    reliability = {r: D['reliability'].__dict__[r] for r in brain_regions}
    image_names = [f.strip() for f in D['image_names']]
    return rdms, betas, reliability, image_names

#RidgeCV

In [None]:
import numpy as np
from fastprogress.fastprogress import progress_bar
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.metrics import r2_score
from pdb import set_trace
from copy import deepcopy
from tqdm import tqdm
import sklearn

default_alphas = np.concatenate([np.array([1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1.0]), np.logspace(1, 5, 50)])

def leave_one_out_ridge(X, y, alphas=default_alphas, fit_intercept=True, normalize=True, mb=None):
    '''
        Construct predicted brain patterns by training on N-1 items, 
        and then predicting the held out item.
        
        X: model responses [numItems x numFeatures]
        y: brain responses [numItems x numVoxels]
    '''
    n_items, n_features = X.shape
    n_voxels = y.shape[1]
    y_pred = np.zeros(y.shape)

    ALPHAS = []
    COEF_M = np.zeros((n_voxels, n_features))
    INTERCEPT = []
    for iter_count, test_idx in enumerate(progress_bar(range(n_items), total=n_items)):
        train_idxs = np.ones(n_items) == True
        train_idxs[test_idx] = False
        test_idxs = ~train_idxs  

        clf = RidgeCV(alphas=alphas, fit_intercept=fit_intercept)

        if normalize:
            scaler = StandardScaler()
            scaler.fit(X[train_idxs])
            X_train = scaler.transform(X[train_idxs])
            X_test = scaler.transform(X[test_idxs])        
        else:
            X_train = X[train_idxs]
            X_test = X[test_idxs]
              
        clf.fit(X_train, y[train_idxs])
        y_pred[test_idxs] = clf.predict(X_test)
        
        ALPHAS.append(clf.alpha_)
        COEF_M += clf.coef_
        INTERCEPT.append(clf.intercept_)

    ALPHAS = np.stack(ALPHAS)
    COEF_M /= iter_count
    INTERCEPT = np.stack(INTERCEPT)
    R2 = r2_score(y, y_pred, multioutput='raw_values')
    
    return {
        "n_items": n_items,
        "n_features": n_features,
        "n_voxels": y.shape[1],
        "ALPHAS": ALPHAS,
        "COEF_M": COEF_M,
        "INTERCEPT": INTERCEPT,
        "R2": R2,
        "y_pred": y_pred
    }

In [None]:
import torch
from torchvision import models, transforms 
from PIL import Image 
from natsort import natsorted 
from glob import glob 
from pathlib import Path 
from feature_extractor import FeatureExtractor

def prepare_images(dataset='InanimateObjects', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):

    # standard imagenet normalization
    normalize = transforms.Normalize(mean=mean, std=std)

    transform = transforms.Compose([
        lambda x: Image.open(x),    # use PIL to open the image
        transforms.Resize(224),     # resize shorted edge to 224 pixels
        transforms.CenterCrop(224), # center crop if not square
        transforms.ToTensor(),      # convert from RGB (HxWxC) to channels first torch tensor [CxHxW]
        normalize                   # normalize by imagenet stats
    ])
    files = natsorted(glob(f'./Stimuli/{dataset}/*.jpg'))
    file_names = [Path(f).name for f in files] 
    imgs = torch.stack([transform(f) for f in files])

    return imgs

def fit_encoding_model(betas, layer_name, model_name,
                       dataset='InanimateObjects', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
          
    print("==> prepare images")
    imgs = prepare_images(dataset=dataset, mean=mean, std=std)
    
    print("==> load pretrained model")
    model = models.__dict__[model_name](pretrained=True)
    print("==> extract activation map for the given layer")
    pred_rdms = {}
    feat_rdms = {}
    model.eval()   # <-- very important, freeze normalization stats, no dropout etc.
    with FeatureExtractor(model, [layer_name]) as extractor:
        features = extractor(imgs)
        for layer_name,feat in features.items():
            # retain spatial information, but flatten rows into a 1D feature vector
            X = torch.flatten(feat, 1)
            feat_rdm = 1 - np.corrcoef(X)
            feat_rdms[layer_name] = feat_rdm
            
            print(f"==> fitting ridge regression model ({layer_name}) (numFeatures={X.shape[1]})")
            results = leave_one_out_ridge(X, betas, fit_intercept=True, normalize=False)
            
            # compute the predicted neural RDM
            pred_rdm = 1 - np.corrcoef(results['y_pred'])
            pred_rdms[layer_name] = pred_rdm
              
            # now do something with the rdms, e.g., save them for our split-half analysis
          
    return pred_rdms, feat_rdms, results

#Getting Significant Features

Lesion Highest

In [None]:
import pickle
def get_weights_of_vox(coef_m, vox_num):
  max_vox_coef_m = np.abs(coef_m[vox_num, :])
  return max_vox_coef_m

def get_percentage_best_features(max_vox_coef_m, decimal_percentage):
  #changes to decimal percentage
  if decimal_percentage > 1:
    decimal_percentage /= 100
  best_features = []
  percentile = int(max_vox_coef_m.size*decimal_percentage)
  best_features = np.argsort(max_vox_coef_m)[-percentile:]
  return best_features

def get_feature_counts_for_most_consistent_across_voxels(coef_m, num_voxels, num_features, percentage):
  #0 vector the length of num features
  feature_counts = np.zeros(num_features)
  #list of every vox num
  vox_nums = np.arange(num_voxels)
  #for each voxel
  for vox_num in vox_nums:
    #get weights of vox_num voxel
    vox_coef_m = get_weights_of_vox(coef_m, vox_num)
    #get percentage (25%) highest weighted features - list format
    best_features = get_percentage_best_features(vox_coef_m, percentage)
    for best_feature in best_features:
      #adds 1 to that index every time that the feature is among top 25% for a given voxel
      feature_counts[best_feature] += 1
  #returns array of counts
  return feature_counts

def get_most_common_features_across_voxels(feature_counts, percentage):
  #1D vector of size of features - use for future masking
  feature_1_hot = np.zeros(len(feature_counts))
  #takes percent and turns into index at that percent
  percentile = int(len(feature_counts)*percentage)
  #takes weights from percentile to highest - most common percentage (15%) weighted features
  best_features = np.argsort(feature_counts)[-percentile:]
  for feature in best_features:
    feature_1_hot[feature] += 1
  for i in range(len(feature_1_hot)):
    if feature_1_hot[i] >1:
      feature_1_hot[i] = 1
  return feature_1_hot, best_features

#saving dicts
def dict_save(dict, file_name):
    with open(file_name + '.pickle', 'wb') as f:
        pickle.dump(dict, f, pickle.HIGHEST_PROTOCOL)

def dict_load(dict_name):
    with open(dict_name , 'rb') as f:
              #+ '.pickle', 'rb') as f:
        return pickle.load(f)

#A Priori

In [None]:
#Defining model and getting layer names
def get_layers(model, parent_name='', layer_info=[]):
    for module_name, module in model.named_children():
        layer_name = parent_name + '.' + module_name
        if len(list(module.named_children())):
            layer_info = get_layers(module, layer_name, layer_info=layer_info)
        else:
            layer_info.append(layer_name.strip('.'))
    
    return layer_info
def get_layer_names(model):
    return get_layers(model, parent_name='', layer_info=[])


#DROPOUT MODEL                                 
model = models.alexnet(pretrained=True)
model_name = 'alexnet'

layer_names = get_layer_names(model)
layer_names

In [None]:
import random
subs = [0,1,2,3,4,5,6,7,8,9]
#subs = random.sample(subs, 2) 
percentage_common = .25

model_name = 'alexnet'

rdms, betas, reliability, image_names = load_brain_data('InanimateObjects')
brain_region = 'aOTC'
#layer_name = 'classifier.1'
layer_name = 'fc6.0'


for sub in subs:

  sub_betas = betas[brain_region][sub].transpose()
  pred_rdms, feat_rdms, results = fit_encoding_model(sub_betas, 
                                                   model_name=model_name, 
                                                   layer_name=layer_name,
                                                   dataset='InanimateObjects', 
                                                   mean=[0.485, 0.456, 0.406], 
                                                   std=[0.229, 0.224, 0.225])

  num_voxels = results['n_voxels']
  num_features = results['n_features']
  coef_m = results['COEF_M']
  feature_counts = get_feature_counts_for_most_consistent_across_voxels(coef_m, num_voxels,num_features, percentage_common)
  if sub == subs[0]:
    all_feature_counts = np.zeros(num_features)
    all_feature_counts = all_feature_counts + feature_counts
  else:
    all_feature_counts = all_feature_counts + feature_counts

In [None]:
model_name = 'pretrained_alexnet'
aOTC_classifier1_feature_idxs = {}
percentages = np.arange(.01 , 1, .01)
for percentage in percentages:
  feature_1_hot, feature_idxs = get_most_common_features_across_voxels(all_feature_counts, percentage)
  aOTC_classifier1_feature_idxs[round(percentage,2)] = feature_idxs



In [None]:
pOTC_classifier1_feature_idxs[.05]

array([1119, 2938,  167,  771, 3527, 1518, 2533, 3725, 2862,  840,  165,
       3337, 3191, 2595, 1161, 1486, 3059, 1355,  375, 2043,  943,  371,
       1678, 2427, 2842, 3036,  185, 1371, 2834,   90,  521, 3076, 2843,
       2946, 3809, 3476, 2306, 2056, 2997, 1877, 1910, 1212, 1190, 3655,
       2498, 1331, 1032,  646, 2552, 1478, 3029, 1847, 1258,  225,  584,
       3815,  327, 1756, 3595, 3267, 4046, 1638, 3797, 1677, 1853,  587,
       2310, 3745, 3784,  525,  743,  423, 3115, 1460, 3619, 3587, 1498,
       2372,  209, 2488, 3118, 2725, 3143,  981, 1338, 1983, 2368, 2124,
        468, 2035, 2539, 1056, 1178, 1441, 1138, 2959,  785, 3386,  912,
       1392,  194, 2375, 3896, 1207,  447, 1341, 1060, 1136, 2008, 2903,
       2434, 2716, 2525,  893, 2176, 1811, 2786, 2653,  603, 2857,  192,
       2072, 1009, 2135, 3930, 3881, 1349, 2308, 1962, 3508, 1661, 2768,
       2132, 2154, 3463, 1703, 3848, 3159, 3201,  825,    1, 3092, 2537,
        244, 3903, 3362, 2057,   81,  839, 1374, 24

In [None]:
aOTC_classifier1_feature_idxs[.05]

array([2135,  209, 2981, 2573, 2539, 3429, 3526,  210, 3589, 1392, 3900,
       1527, 2255, 2409, 1592, 3550, 2348, 3549,   81, 1338, 2857, 3318,
        646, 3380, 1908,  738,  609, 3191, 3171, 2713, 1910, 3076, 3328,
       2595,  775,  203, 1821, 2001, 2919, 3527, 1646, 2866, 2503, 2816,
       3247, 3576, 3618, 3788, 1756, 2577, 3554, 2290, 3636,  242, 1356,
       2635, 1877, 1677, 1207, 3059, 1609, 2392, 2041, 1349, 3931, 3777,
       1790, 3797, 3657, 2537, 1890, 1587, 3415,  636, 1258, 3712, 3409,
        587, 1460, 2768, 2176, 3036,   43, 2115, 1188, 3595,  327,  447,
       3329, 3126, 2487, 1962,  805,  166, 2660,  350, 1971,  298, 3000,
       3929, 1498,  893, 3801, 3815,  468, 2903, 2605, 1178, 3172, 1346,
        743,  414, 1757,  603, 2820, 2480, 3749, 2552, 1486, 3362, 3448,
       1190, 3881,  868, 2015, 1992, 3115, 2725, 1362, 2154,  512, 3372,
       1089,  375, 2368, 1032, 1009, 2843, 3619, 2072, 1538, 2434, 3201,
        207, 2253, 1847, 1138, 1854, 2008, 3463, 30

In [None]:
print(len(aOTC_classifier1_feature_idxs[.05]))

y = []
for i in aOTC_classifier1_feature_idxs[.05]:
  if i in pOTC_classifier1_feature_idxs[.05]:
    y.append(i)

len(y)

204


126

In [None]:
from google.colab import files
#model_name = 'pretrained_alexnet'
model_name = "alexnetgn_1x"
brain_region = 'aOTC'
dict_save(aOTC_classifier1_feature_idxs, brain_region+"_"+model_name+"_"+layer_name+"_feature_idxs")
files.download(brain_region+"_"+model_name+"_"+layer_name+"_feature_idxs.pickle")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Recursivley

In [None]:
import random
from google.colab import files

subs = [0,1,2,3,4,5,6,7,8,9]
#subs = random.sample(subs, 2) 
#subs = random.sample(subs, 5) 
percentage_common = .25

brain_region = 'aOTC'

###for alexnet pretrained###
#layer_names =  ['avgpool','classifier.0','classifier.1','classifier.2','classifier.3','classifier.4','classifier.5','classifier.6']
layer_names = ['features.0','features.1','features.2','features.3','features.4','features.5','features.6']
layer_names = ['featuers.6']
#layer_names = ['features.6','features.7','features.8','features.9','features.10','features.11','features.12']


model_name = 'alexnet'

rdms, betas, reliability, image_names = load_brain_data('InanimateObjects')


for layer_name in layer_names:
  subs = random.sample(subs, 2) 

  for sub in subs:
    
    model_name = 'alexnet'
    sub_betas = betas[brain_region][sub].transpose()
    pred_rdms, feat_rdms, results = fit_encoding_model(sub_betas, 
                                                   model_name=model_name, 
                                                   layer_name=layer_name,
                                                   dataset='InanimateObjects', 
                                                   mean=[0.485, 0.456, 0.406], 
                                                   std=[0.229, 0.224, 0.225])

    num_voxels = results['n_voxels']
    num_features = results['n_features']
    coef_m = results['COEF_M']
    feature_counts = get_feature_counts_for_most_consistent_across_voxels(coef_m, num_voxels,num_features, percentage_common)
    if sub == subs[0]:
      all_feature_counts = np.zeros(num_features)
      all_feature_counts = all_feature_counts + feature_counts
    else:
      all_feature_counts = all_feature_counts + feature_counts

  model_name = 'pretrained_alexnet'

  layer_name_feature_idxs = {}
  percentages = np.arange(.01 , 1, .01)
  layer_name = layer_name.replace(".", "")
  for percentage in percentages:
    feature_1_hot, feature_idxs = get_most_common_features_across_voxels(all_feature_counts, percentage)
    layer_name_feature_idxs[round(percentage,2)] = feature_idxs

  dict_save(layer_name_feature_idxs, brain_region+"_"+model_name+"_"+layer_name+"_feature_idxs")
  files.download(brain_region+"_"+model_name+"_"+layer_name+"_feature_idxs.pickle")

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.0) (numFeatures=193600)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.0) (numFeatures=193600)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.1) (numFeatures=193600)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.1) (numFeatures=193600)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.2) (numFeatures=46656)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.2) (numFeatures=46656)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.3) (numFeatures=139968)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.3) (numFeatures=139968)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.4) (numFeatures=139968)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.4) (numFeatures=139968)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.5) (numFeatures=32448)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.5) (numFeatures=32448)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.6) (numFeatures=64896)


==> prepare images
==> load pretrained model


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


==> extract activation map for the given layer
==> fitting ridge regression model (features.6) (numFeatures=64896)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Lesion Random

In [None]:
#check num features  
model_name = 'alexnet' 
rdms, betas, reliability, image_names = load_brain_data('InanimateObjects')
brain_region = 'EarlyV'
layer_name = 'features.11'

sub_betas = betas[brain_region][1].transpose()
pred_rdms, feat_rdms, results = fit_encoding_model(sub_betas, 
                                                   model_name=model_name, 
                                                   layer_name=layer_name,
                                                   dataset='InanimateObjects', 
                                                   mean=[0.485, 0.456, 0.406], 
                                                   std=[0.229, 0.224, 0.225])


In [None]:
import random
def get_random_feature_units(num_features, percentage):
  features = list(np.arange(0,num_features))
  percentile = int(len(features)*percentage)
  random_features = random.sample(features, percentile)
  return random_features

In [None]:
layer_name_random_feature_idxs = {}
num_features = 4096
percentages = np.arange(.01 , 1, .01)

for percentage in percentages:
  random_feature_idxs = get_random_feature_units(num_features, percentage)
  layer_name_random_feature_idxs[round(percentage,2)] = random_feature_idxs

In [None]:
dict_save(layer_name_random_feature_idxs, layer_name.replace(".","")+"_random_feature_idxs")
files.download(layer_name.replace(".","")+"_random_feature_idxs.pickle")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#recursivley
layers = ['classifier.0', 'classifier.1', 'classifier.2', 'classifier.4', 'features.0', 'features.1', 'features.2', 'features.3', 'features.4', 'features.6', 'features.7', 'features.9', 'features.10', 'features.11']
num_features_list = [9216, 4096, 4096, 4096, 193600, 193600, 46656, 139968, 139968, 64896, 64896, 43264, 43264, 43264]


In [None]:
#recursively
import random
from google.colab import files

for i in range(len(layers)):
  layer_name = layers[i].replace(".", "")
  num_features = num_features_list[i]

  layer_name_random_feature_idxs = {}
  percentages = np.arange(.01 , 1, .01)

  for percentage in percentages:
    random_feature_idxs = get_random_feature_units(num_features, percentage)
    layer_name_random_feature_idxs[round(percentage,2)] = random_feature_idxs

  dict_save(layer_name_random_feature_idxs, layer_name+"_random_feature_idxs")
  files.download(layer_name+"_random_feature_idxs.pickle")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>