In [1]:
#### Saliency notebook
#### Takes an image and visualizes its saliency prediction

import os
import sys
import io
import time

import torch
import torchvision
import torch.nn.functional as nnF
import torchvision.transforms.functional as F
import numpy as np
import pandas as pd
from PIL import Image

def preprocess_img(img, i, j, h, w):
    # i, j, h, w = 0, 0, 7.0/8 * 512, 14.0/8 * 512
    img = F.crop(img, i, j, h, w)
    #img = F.resize(img, 256, torchvision.transforms.InterpolationMode.BICUBIC)
    img = F.to_tensor(img)
    return img

def normalize_imagenet(img):
    img = F.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    return img

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

img = Image.open('example_images/gnochi_mirror.jpeg')
print(img.size)
img_tensor = preprocess_img(img, 0, 0, img.size[1], img.size[0]).unsqueeze(0).to(DEVICE)
print(img_tensor.shape)


(960, 1280)
torch.Size([1, 3, 1280, 960])


In [2]:
# Load VGG and extract features for perceptual loss
from utils import hook_model

def load_vgg():
    vgg = torchvision.models.vgg16(pretrained=True)
    vgg.eval()
    vgg.to(DEVICE)
    return vgg

# load the pretrained VGG
vgg_model = load_vgg()

# Hook the model to reach its middle layers
vgg_hook, vgg_layers = hook_model(vgg_model, True)

# Forward pass to activate the hook
_ = vgg_model(normalize_imagenet(img_tensor))

# Choose a layer from vgg_layers.keys()
f_layer = 'features-30:MaxPool2d'

# Extract the features
feats = vgg_hook(f_layer).float()
print(feats.shape)


torch.Size([1, 512, 40, 30])
