# Extract features for object2vec stimulus set
##package "utilsCM" slightly modified from "utils" written by Eric Elmolzino

In [None]:
from torch import nn
import torch
from torchvision.models.alexnet import alexnet
from tqdm import tqdm

In [None]:
## Specify Alexnet model
class AlexNet(nn.Module):
    def __init__(self, feature_name):
        super().__init__()
        self.feature_name = feature_name
        base = alexnet(pretrained=True)
        self.conv_1 = base.features[:3]
        self.conv_2 = base.features[3:6]
        self.conv_3 = base.features[6:8]
        self.conv_4 = base.features[8:10]
        self.conv_5 = base.features[10:]
        self.avgpool = base.avgpool
        self.fc_1 = base.classifier[:3]
        self.fc_2 = base.classifier[3:6]
        self.fc_3 = base.classifier[6:]
        self.eval()
    def forward(self, stimuli):
        x = self.conv_1(stimuli)
        if 'conv_1' == self.feature_name: return x.view(x.shape[0], -1)
        x = self.conv_2(x)
        if 'conv_2' == self.feature_name: return x.view(x.shape[0], -1)
        x = self.conv_3(x)
        if 'conv_3' == self.feature_name: return x.view(x.shape[0], -1)
        x = self.conv_4(x)
        if 'conv_4' == self.feature_name: return x.view(x.shape[0], -1)
        x = self.conv_5(x)
        if 'conv_5' == self.feature_name: return x.view(x.shape[0], -1)
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)
        if 'pool' == self.feature_name: return x
        x = self.fc_1(x)
        if 'fc_1' == self.feature_name: return x
        x = self.fc_2(x)
        if 'fc_2' == self.feature_name: return x
        x = self.fc_3(x)
        if 'fc_3' == self.feature_name: return x
        return None 

In [3]:
from utilsCM import listdir, image_to_tensor

# specify variables
resolutionval = 227;
layer = 'conv_5';
model = AlexNet(layer);
# print(model)

#sepcify directory where images (for which we have fMRI data) are
conditions = listdir('data-object2vec/stimuli')


print('Extracting stimuli features')
condition_features = {}
for c in tqdm(conditions):
    c_name = c.split('/')[-1]
    stimuli = listdir(c)
    #resize according to resolution and square the image
    stimuli = [image_to_tensor(s, resolution=resolutionval) for s in stimuli]
    stimuli = torch.stack(stimuli)
    if torch.cuda.is_available():
            stimuli = stimuli.cuda()
    with torch.no_grad():
        #average across the same category
        feats = model(stimuli).mean(dim=0).cpu().numpy()
    condition_features[c_name] = feats

#checking dimensionality. For convolutional layers, output is maxpool
#Example Conv1: 55 x 55 x 64
# condition_features['airplane'].shape

  0%|          | 0/81 [00:00<?, ?it/s]

Extracting stimuli features


100%|██████████| 81/81 [00:23<00:00,  3.41it/s]


In [6]:
#extract fMRI data
from utilsCM import Subject
subject = Subject(1,['LOC'])
# feat_extractor = AlexNetFC6()

In [7]:
#Predicting fMRI reponses with features (Alexnet activations) 
#cv_regression computes cross-validated ridge regression. cross-validation groupings were pre-set based
#on fMRI design. 9-categories out, 9 folds. r is averaged over folds, weights are computed over all data
from utilsCM import cv_regression

weights, r = cv_regression(condition_features, subject, l2=0)

# Load Word2Sense, extract Alexnet features for those lables, use previously computed weights to compute predicted fMRI responses

In [None]:
#Load word2sense
#already preprocessed in MATLAB so that Wrd2Sns and THINGs overlap --> we have IMAGES, LABELS and SENSES
import pandas as pd

pathtofile = '../Code-001/'
Wrd2Sense = pd.read_csv(pathtofile + "ThingsWrd2Sns.txt", sep=",")
ImgInfo = pd.read_csv(pathtofile + "KeptTHINGSInfo.txt", sep=",")

In [None]:
##Compute feature activations for THINGS dataset
import os.path
import csv
import pandas as pd

new_condition_features = {}

if os.path.isfile("Things_" + layer + ".csv"):
     b = pd.read_csv("Things_" + layer + ".csv", sep=",", header=0)
else:
    ## ImageFolder must contain subfolders of images:
    image_dir = '/Users/cmagri1/OneDrive - Johns Hopkins/Project-Word2Sense//THINGSdataset/Main/images'
    print('Extracting new stimuli features')
    new_conditions = listdir(image_dir)
    new_condition_features = {}
    for c in tqdm(new_conditions):
        c_name = c.split('/')[-1]
        if (Wrd2Sense['item']==c_name).any(): ##if it's both in Word2Sense and Things
            stimuli = listdir(c)
            #resize according to resolution and square the image
            stimuli = [image_to_tensor(s, resolution=resolutionval) for s in stimuli]
            stimuli = torch.stack(stimuli)
            if torch.cuda.is_available():
                stimuli = stimuli.cuda()
            with torch.no_grad():
                #average across the same category
                feats = model(stimuli).mean(dim=0).cpu().numpy()
            new_condition_features[c_name] = feats
    
q = pd.DataFrame(new_condition_features).transpose()
q.to_csv(os.getcwd() + "/Things_" + layer + ".csv", index = True, header=False)

In [None]:
b = pd.read_csv("Things_" + layer + ".csv", sep=",",index_col = 0, header=None)

In [None]:
import numpy as np

b[0:len(b)-1].to_numpy()
# This will return dot product 
# Computing the predicted activation for all the THINGS dataset objects
ROIpred = np.dot(b[0:len(b)-1].to_numpy(),weights.transpose()) 