In [12]:
import torch
import torch.nn as nn
from torch.nn.modules.conv import Conv2d
from torchvision import transforms

from tqdm import tqdm, trange

In [4]:
class IVSN(nn.Module):

    def __init__(self, model):
        super().__init__()
        self.features = nn.Sequential(*list(model.children())[:30]) #get first 30 layers of the model
        for param in self.features.parameters():
            param.requires_grad = False
            #freeze these layers
        
    def forward(self, x):
        x = self.features(x)
        return x

ConvSize, NumTemplates, Mylayer = 1, 512, 31
TotalTrials, targetsize, stimulisize = 600, 156, (676, 756)
MMconv = Conv2d(NumTemplates, 1, kernel_size = (ConvSize, ConvSize), stride = (1, 1), padding = (1, 1))

In [15]:
model_vgg = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)
model_ivsn = IVSN(model_vgg)
model_ivsn

Using cache found in /Users/ishaan/.cache/torch/hub/pytorch_vision_v0.10.0


IVSN(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (16): 

In [6]:
from arrayDataset import ArrayDataset

input_images = ArrayDataset('data/array/gt_positions.csv', 'data/array/stimuli', 'data/array/target', target_size=targetsize)

In [18]:
import torch
from tqdm import trange

num_pics, size, image_size, shorter_side = len(input_images), 48, (320, 512), 128
IVSN_attention_map, scanpath = {}, {}
IVSN_res = list()

with torch.no_grad():
    for id in range(1):
        stimuli_img, target_img, bbox = input_images[id]
        
        # Repeat the input images 3 times to convert 1 channel to 3 channels (if necessary)
        stimuli_img = stimuli_img.repeat(1, 3, 1, 1)  # Repeat along the channel dimension
        target_img = target_img.repeat(1, 3, 1, 1)  # Repeat along the channel dimension
                
        # Resize the input images to 224x224 for model input
        stimuli_img_resized = transforms.Resize((320, 512))(stimuli_img)
        target_img_resized = transforms.Resize((320, 512))(target_img)
                
        # Get the output from the model (now both inputs are resized to 224x224)
        stimuli_output = model_ivsn(stimuli_img_resized)
        target_output = model_ivsn(target_img_resized)
        
        # Update MMconv weights with target output
        MMconv.weight = torch.nn.Parameter(target_output)
        
        # Get the attention map by applying MMconv to stimuli_output
        attention_IVSN = MMconv(stimuli_output)
        attention_IVSN = attention_IVSN.squeeze(0)  # Remove batch dimension
        
        # Resize the attention map back to the original size (756x676)
        attention_IVSN_resized = transforms.Resize((756, 676))(attention_IVSN)
        
        # Normalize the attention map
        mask_IVSN = torch.divide(attention_IVSN_resized, attention_IVSN_resized.max())
        
        # Print the attention map and its size
        print("Attention map shape after resizing:", attention_IVSN_resized.shape)
        print(mask_IVSN)  # You can print or visualize the normalized attention map



RuntimeError: mat1 and mat2 shapes cannot be multiplied (3584x7 and 25088x4096)