In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from torchvision import transforms
import torchvision

import matplotlib.pyplot as plt
import random
import numpy as np
import copy
import time
import os
import cv2
from PIL import Image
from torchvision.models import vgg19

# controllare

# Load data

In [2]:
def set_seed(seed, use_gpu = True):
    """
    Set SEED for PyTorch reproducibility
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if use_gpu:
        torch.cuda.manual_seed_all(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

SEED = 44

USE_SEED = False

if USE_SEED:
    set_seed(SEED, torch.cuda.is_available())

In [29]:
def load_image(image_path, device, output_size=None):
    """Loads an image by transforming it into a tensor."""
    img = Image.open(image_path)

    output_dim = None
    if output_size is None:
        output_dim = (img.size[1], img.size[0])
    elif isinstance(output_size, int):
        output_dim = (output_size, output_size)
    elif isinstance(output_size, tuple):
        if (len(output_size) == 2) and isinstance(output_size[0], int) and isinstance(output_size[1], int):
            output_dim = output_size
    else:
        raise ValueError("ERROR: output_size must be an integer or a 2-tuple of (height, width) if provided.")

    torch_loader = transforms.Compose(
        [
            transforms.Resize(output_dim),
            transforms.ToTensor()
        ]
    )
    
    img_tensor = torch_loader(img).unsqueeze(0)
    return img_tensor.to(device)

In [30]:
def image_style_transfer(config):
    """Implements neural style transfer on a content image using a style image, applying provided configuration."""
    ...
    
    # load content and style images
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    output_size = config.get('output_image_size')
    if output_size is not None:
        if len(output_size) > 1: 
            output_size = tuple(output_size)
        else:
            output_size = output_size[0]

    content_tensor = load_image(content_path, device, output_size=output_size)
    output_size = (content_tensor.shape[2], content_tensor.shape[3])
    style_tensor = load_image(style_path, device, output_size=output_size)

In [65]:
content_path = "/home/gloria/Scrivania/Vision_and_cognitive_system/content_style/content.jpg"
style_path = "/home/gloria/Scrivania/Vision_and_cognitive_system/content_style/style1.jpg"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
output_size = 32

content_tensor = load_image(content_path, device, output_size=output_size)
#output_size = (content_tensor.shape[2], content_tensor.shape[3])
style_tensor = load_image(style_path, device, output_size=output_size)

In [69]:
content_tensor.shape
style_tensor.shape

torch.Size([3, 32, 32])

In [25]:
train_mean = style_tensor.mean(axis=(0,2,3)) 
train_std = content_tensor.std(axis=(0,2,3))

print("-----  TRAIN NORMALIZATION VALUES  -----")
print(f"Mean: {train_mean}")
print(f"Standard Deviation: {train_std}")

-----  TRAIN NORMALIZATION VALUES  -----
Mean: tensor([0.5936, 0.5965, 0.5478])
Standard Deviation: tensor([0.2215, 0.2203, 0.2530])


# VGG model

In [37]:
%%capture
vgg19 = torchvision.models.vgg19(pretrained = False)

In [38]:
print(vgg19)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [51]:
class VGG19(nn.Module):
    def __init__(self):
        super(VGG19, self).__init__()

        self.chosen_features = {0: 'conv1_1', 5: 'conv2_1', 10: 'conv3_1', 19: 'conv4_1', 28: 'conv5_1'}
        self.vgg = torchvision.models.vgg19(pretrained=True).features[:29]
        
    def forward(self, x):
        feature_maps = dict()
        for idx, layer in enumerate(self.vgg):
            x = layer(x)
            if idx in self.chosen_features.keys():
                feature_maps[self.chosen_features[idx]] = x
        
        return feature_maps

#load the model
vgg = VGG19().to(device).eval()

In [52]:
vgg

VGG19(
  (vgg): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding

# Losses

In [62]:
def get_content_loss(content_original, content_current):
    #return torch.mean((content_original-content_current)**2)
    return torch.nn.MSELoss(reduction='mean')(content_original, content_current)

In [96]:
def gram_matrix(x, should_normalize=True):
    (ch, h, w) = x.size()
    features = x.view(ch, w * h)
    features_t = features.t()
    gram = features.mm(features_t)
    if should_normalize:
        gram /= ch * h * w
    return gram

In [97]:
gram_matrix1 = gram_matrix(style_tensor, should_normalize=True)
gram_matrix1

tensor([[0.1309, 0.1274, 0.1070],
        [0.1274, 0.1301, 0.1151],
        [0.1070, 0.1151, 0.1123]])

In [None]:
style_loss = 0.0
    current_style_representation = [utils.gram_matrix(x) for cnt, x in enumerate(current_set_of_feature_maps) if cnt in style_feature_maps_indices]
    for gram_gt, gram_hat in zip(target_style_representation, current_style_representation):
        style_loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
    style_loss /= len(target_style_representation)