In [7]:
import torch
import torch.nn as nn
from torch.nn.modules.conv import Conv2d
from torchvision import transforms

from tqdm import tqdm, trange

In [8]:
class IVSN(nn.Module):
  def __init__(self, model):
      super(IVSN, self).__init__()
      self.features = nn.Sequential(*list(model.children())[0][:30])
      for param in self.features.parameters():
        param.requires_grad_ = False

  def forward(self, x):
      x = self.features(x)
      return x

ConvSize, NumTemplates, Mylayer = 1, 512, 31
TotalTrials, targetsize, stimulisize = 600, (156, 156), (676, 756)
MMconv = Conv2d(NumTemplates, 1, kernel_size = (ConvSize, ConvSize), stride = (1, 1), padding = (1, 1))
# 512 input channels, 1 output channel, 1x1 kernel, stride 1, padding 1

(1, 1)


In [15]:
from torchvision.models import VGG16_Weights

model_vgg = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', weights=VGG16_Weights.DEFAULT)
model_ivsn = IVSN(model_vgg)
model_ivsn

Using cache found in /Users/ishaan/.cache/torch/hub/pytorch_vision_v0.10.0


IVSN(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=

In [27]:
import importlib
import arrayDataset
import matplotlib.pyplot as plt
importlib.reload(arrayDataset)

input_images = arrayDataset.ArrayDataset('data/array/gt_positions.csv', 'data/array/stimuli', 'data/array/target', target_size=targetsize, normalize_means=[0.485, 0.456, 0.406], normalize_stds=[0.229, 0.224, 0.225])

#plt.imshow(input_images[0][0].numpy()[0]) #stimuli 676 x 756
#plt.imshow(input_images[0][1].numpy()[0]) #target 156 x 156

In [43]:
import torch
from tqdm import trange
from utils import arraySearchProcesswithPath

num_pics = len(input_images) #600 images
IVSN_attention_map, scanpath = {}, {}
IVSN_res = list()

with torch.no_grad():
    for id in range(1):
        stimuli_img, target_img, gpos = input_images[id]
        #Need 3 channels:
        if stimuli_img.shape[0] == 1:
            stimuli_img = torch.cat((stimuli_img, stimuli_img, stimuli_img), 0)
        if target_img.shape[0] == 1:
            target_img = torch.cat((target_img, target_img, target_img), 0)
        
        
        stimuli_batch = stimuli_img.unsqueeze(0) #(1, 3, 756, 676)
        target_batch = target_img.unsqueeze(0) #(1, 3, 156, 156)
        
        # Get the output from the model after going through all 30 layers
        stimuli_output = model_ivsn(stimuli_batch) #(1, 512, 47, 42)
        target_output = model_ivsn(target_batch) #(1, 512, 9, 9)    
        
        # Update MMconv weights with target output
        MMconv.weight = torch.nn.Parameter(target_output)
        
        # Get the attention map by applying MMconv to stimuli_output
        attention_IVSN = MMconv(stimuli_output) #(1, 1, 41, 36)
        attention_IVSN = attention_IVSN.squeeze(0)  #(1, 41, 36)
                
        # Normalize the attention map
        mask_IVSN = transforms.Resize((stimuli_img.shape[1], stimuli_img.shape[2]))(attention_IVSN)
        mask_IVSN = torch.divide(mask_IVSN, mask_IVSN.max()) # (1, 756, 676)
        
        IVSN_attention_map[id] = mask_IVSN.clone().detach() #create deepcopy
                
        IVSN_num, path = arraySearchProcesswithPath(mask_IVSN, gpos)
        scanpath[id] = path
        IVSN_res.append(IVSN_num)

print(IVSN_res)



torch.Size([1, 756, 676])


TypeError: unsupported operand type(s) for -: 'Tensor' and 'tuple'

In [6]:
IVSN_attention_map[0].shape

torch.Size([1, 41, 36])