# Neural Style Transfer

First we begin by importing all the necessary packages

In [3]:
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.optim as optim
from torchvision import transforms, models

RESULT_PATH = '/home/dell-pc/Desktop/challenge-master/Model/'
# change the above accordingly. Here the pretrained model is downloaded and final image is saved

Next are a few modules to load image, get features and computing losses. We shall use the VGG19 pretrained architecture for feature extraction and perform backpropogation on the target image pixels 

In [4]:
def load_image(img_path, max_size=400):
  image = Image.open(img_path).convert('RGB')  
  
  if max(image.size) > max_size:
    size = max_size
  else:
    size = max(image.size)

  in_transform = transforms.Compose([
    transforms.Resize((size, int(1.5*size))),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
  
  image = in_transform(image)[:3, :, :].unsqueeze(0)
  
  return image

In [5]:

def get_features(image, model, layers=None):
  if layers is None:
    layers = {'0': 'conv1_1','5': 'conv2_1',
              '10': 'conv3_1',
              '19': 'conv4_1',
              '21': 'conv4_2',  
              '28': 'conv5_1'}
  features = {}
  x = image
  for name, layer in enumerate(model.features):
    x = layer(x)
    if str(name) in layers:
      features[layers[str(name)]] = x
  
  return features

In [6]:
def gram_matrix(tensor):
  _, n_filters, h, w = tensor.size()
  tensor = tensor.view(n_filters, h * w)
  gram = torch.mm(tensor, tensor.t())
  
  return gram

In [7]:
def im_convert(tensor):
  image = tensor.to("cpu").clone().detach()
  image = image.numpy().squeeze()
  image = image.transpose(1, 2, 0)
  image = image * np.array((0.229, 0.224, 0.225)) + np.array(
    (0.485, 0.456, 0.406))
  image = image.clip(0, 1)
  
  return image


In [8]:
def tv_loss(img):

    w_variance = torch.sum(torch.pow(img[:,:,:,:-1] - img[:,:,:,1:], 2))
    h_variance = torch.sum(torch.pow(img[:,:,:-1,:] - img[:,:,1:,:], 2))
    loss = h_variance + w_variance
    return loss

Now loading the pretrained model and computing features

In [20]:
# torch.utils.model_zoo.load_url('https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', model_dir=RESULT_PATH)
vgg = models.vgg19()
vgg.load_state_dict(torch.load(RESULT_PATH + '/vgg19-dcbb9e9d.pth'))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg.to(device).eval()
content = load_image('japanese_garden.jpg').to(device)
style = load_image('picasso_selfportrait.jpg').to(device)
content_features = get_features(content, vgg)
style_features = get_features(style, vgg)

Freezing weights as backprop is on image pixels

In [21]:
for param in vgg.parameters():
  param.requires_grad_(False)


Using average pool to replace max pool to ensure smoothness in final image

In [22]:
for i, layer in enumerate(vgg.features):
  if isinstance(layer, torch.nn.MaxPool2d):
    vgg.features[i] = torch.nn.AvgPool2d(kernel_size=2, stride=2, padding=0)

In [25]:
style_grams = {
  layer: gram_matrix(style_features[layer]) for layer in style_features}
target = torch.randn_like(content).requires_grad_(True).to(device)
# for various weightage for style with lower layers getting more weights
style_weights = {'conv1_1': 0.75,
                 'conv2_1': 0.5,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}

# these weights are for their contribution in final loss function
content_weight = 1e2
style_weight = 1
variation_weight = 1e-4
optimizer = optim.Adam([target], lr=0.01)

In [None]:
for i in range(1, 401):
  
  optimizer.zero_grad()
  target_features = get_features(target, vgg)
  
  content_loss = torch.mean((target_features['conv4_2'] -
                             content_features['conv4_2']) ** 2)	
  variation_loss = tv_loss(target)
  
  style_loss = 0
  for layer in style_weights:
    target_feature = target_features[layer]
    target_gram = gram_matrix(target_feature)
    _, d, h, w = target_feature.shape
    style_gram = style_grams[layer]
    layer_style_loss = style_weights[layer] * torch.mean(
      (target_gram - style_gram) ** 2)
    style_loss += layer_style_loss / (d * h * w)

    total_loss = content_weight * content_loss + style_weight * style_loss + variation_weight*variation_loss
    total_loss.backward(retain_graph=True)
    optimizer.step()

  if i % 10 == 0:
    total_loss_rounded = round(total_loss.item(), 2)
    tv_fraction = round(
      variation_weight*variation_loss.item()/total_loss.item(), 2)
    content_fraction = round(
      content_weight*content_loss.item()/total_loss.item(), 2)
    style_fraction = round(
      style_weight*style_loss.item()/total_loss.item(), 2)
    print('Iteration {}, Total loss: {} - (content: {}, style {}, variation {})'.format(
      i,total_loss_rounded, content_fraction, style_fraction,tv_fraction))
      
final_img = im_convert(target)
fig = plt.figure()
plt.imshow(final_img)
plt.axis('off')
plt.savefig(RESULT_PATH+'final.png')

Iteration 10, Total loss: 1095.49 - (content: 0.59, style 0.25, variation 0.16)
Iteration 20, Total loss: 804.37 - (content: 0.58, style 0.27, variation 0.15)
Iteration 30, Total loss: 626.42 - (content: 0.56, style 0.3, variation 0.14)
Iteration 40, Total loss: 524.41 - (content: 0.55, style 0.32, variation 0.13)
Iteration 50, Total loss: 461.52 - (content: 0.55, style 0.33, variation 0.12)
Iteration 60, Total loss: 417.18 - (content: 0.54, style 0.34, variation 0.12)
Iteration 70, Total loss: 385.04 - (content: 0.54, style 0.34, variation 0.12)
Iteration 80, Total loss: 360.33 - (content: 0.54, style 0.35, variation 0.12)
Iteration 90, Total loss: 341.51 - (content: 0.54, style 0.35, variation 0.12)
Iteration 100, Total loss: 326.91 - (content: 0.53, style 0.35, variation 0.12)
Iteration 110, Total loss: 315.27 - (content: 0.53, style 0.35, variation 0.12)
Iteration 120, Total loss: 306.09 - (content: 0.53, style 0.36, variation 0.12)
Iteration 130, Total loss: 298.04 - (content: 0.5

References:
1. A Neural Algorithm of Artistic Style by Gatys et al.(https://arxiv.org/pdf/1508.06576.pdf)
2. https://www.researchgate.net/figure/Details-on-the-VGG19-architecture-For-each-layer-number-of-filters-parameters-and_tbl1_314237915

