<a href="https://colab.research.google.com/github/WiseCyril/33-js-concepts/blob/master/C4_W1_Lab_1_Neural_Style_Transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Ungraded Lab: Neural Style Transfer

This lab will demonstrate neural style transfer using the techniques discussed in class. You will revisit this again after Lesson 2 of this week's lecture.

### Imports

In [None]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf

import IPython.display as display_obj
from random import randint

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12,12)
mpl.rcParams['axes.grid'] = False

import numpy as np
import PIL.Image
import time
import functools
import os

## Download Images

Download images and choose a style image and a content image:

In [None]:
!wget  https://cdn.pixabay.com/photo/2018/07/14/15/27/cafe-3537801_1280.jpg
!wget  https://cdn.pixabay.com/photo/2017/02/28/23/00/swan-2107052_1280.jpg
!wget  https://i.dawn.com/large/2019/10/5db6a03a4c7e3.jpg
!wget  https://cdn.pixabay.com/photo/2015/09/22/12/21/rudolph-951494_1280.jpg
!wget https://cdn.pixabay.com/photo/2015/10/13/02/59/animals-985500_1280.jpg

## Visualize the input

Define a function to load an image and limit its maximum dimension to 512 pixels.

In [None]:
def tensor_to_image(tensor):
  tensor_shape = tf.shape(tensor)
  number_elem_shape = tf.shape(tensor_shape)
  if number_elem_shape > 3:
    assert tensor_shape[0] == 1
    tensor = tensor[0]
  return tf.keras.preprocessing.image.array_to_img(tensor) 

### Load and preprocess the image

This code is given to you.  
- You will use preprocess_image in your code later in this assignment.

In [None]:
def load_img(path_to_img):
  max_dim = 512
  image = tf.io.read_file(path_to_img)
  image = tf.image.decode_jpeg(image)
  image = tf.image.convert_image_dtype(image, tf.float32)

  shape = tf.shape(image)[:-1]
  shape = tf.cast(tf.shape(image)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim / long_dim

  new_shape = tf.cast(shape * scale, tf.int32)

  image = tf.image.resize(image, new_shape)
  image = image[tf.newaxis, :]
  image = tf.image.convert_image_dtype(image, tf.uint8)
  return image

def preprocess_image(image):
    image = tf.cast(image, dtype=tf.float32)
    image = tf.keras.applications.vgg19.preprocess_input(image)

    return image


Create a simple function to display an image:

In [None]:
def imshow(image, title=None):
  if len(image.shape) > 3:
    image = tf.squeeze(image, axis=0)

  plt.imshow(image)
  if title:
    plt.title(title)
    
    
def show_images_with_objects(images, titles=[]):

  if len(images) != len(titles):
    return

  plt.figure(figsize=(20, 12))
  for idx, (image, title) in enumerate(zip(images, titles)):
    plt.subplot(1, len(images), idx + 1)
    plt.xticks([])
    plt.yticks([])
    imshow(image, title)
    
    
def load_images(content_path, style_path):
  content_image = load_img("{}".format(content_path))
  style_image = load_img("{}".format(style_path))

  return content_image, style_image

## Build the model 

First, download the VGG19 model so that you can inspect the layers that are available for you to use.

In [None]:
tmp_vgg = tf.keras.applications.vgg19.VGG19()
tmp_vgg.summary()

Choose intermediate layers from the network to represent the style and content of the image:

- For the content layer, please use the second convolutional layer of the last convolutional block (just one layer)
- For the style layers, please use the first layer of each convolutional block.

In [None]:
# Content layer where will pull our feature maps

# Fill in the list for content_layers
content_layers = ['block5_conv2'] 

# Style layer of interest
style_layers = ['block1_conv1', 
                'block2_conv1', 
                'block3_conv1', 
                'block4_conv1', 
                'block5_conv1'] 

# combine the two lists (put the style layers before the content layers)
output_layers = style_layers + content_layers 

num_content_layers = len(content_layers)
num_style_layers = len(style_layers)
num_output_layers = len(output_layers)

Define your model to take the same input as the standard vgg19 model, and output just the selected content and style layers.

In [None]:
def vgg_model(layer_names):
  """ Creates a vgg model that returns a list of intermediate output values.
  
  args:
    layer_names: a list of strings, representing the names of the desired content and style layers
    
  returns:
    A model that takes the regular vgg19 input and outputs just the content and style layers.
  
  """

  # Load the the pretrained VGG, trained on imagenet data
  vgg = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet')

  # Freeze the weights of the model's layers (make them not trainable)
  vgg.trainable = False
  
  # Create a list of layer objects that are specified by layer_names
  outputs = [vgg.get_layer(name).output for name in layer_names]

  # Create the model that outputs content and style layers only
  model = tf.keras.Model(inputs=vgg.input, outputs=outputs)

  return model

Create an instance of the content and style model using the function that you just defined

In [None]:
vgg = vgg_model(output_layers) 

### Calculate style loss

The style loss is the average of the squared differences between the features and targets.

In [None]:
def get_style_loss(features, targets):
  """Expects two images of dimension h, w, c
  args:
    features: tensor with shape: (height, width, channels)
    targets: tensor with shape: (height, width, channels)
  """
    
  # Calculate the style style loss
  style_loss = tf.reduce_mean(tf.square(features - targets))
    
  return style_loss

### Calculate content loss

Calculate the content loss as the average squared difference between the features and targets.

In [None]:
def get_content_loss(features, targets):
  """Expects two images of dimension h, w, c
  args:
    features: tensor with shape: (height, width, channels)
    targets: tensor with shape: (height, width, channels)
  """
    
  # Calculate the style style loss
  content_loss = tf.reduce_mean(tf.square(features - targets)) # @REPLACE content_loss = None
    
  return content_loss

### Calculate the gram matrix

Use `tf.linalg.einsum` to calculate the gram matrix for an input tensor.
- In addition, calculate the scaling factor `num_locations` and divide the gram matrix calculation by `num_locations`.

$$ \text{num locations} = height \times width $$

In [None]:
def gram_matrix(input_tensor):
  """ Calculates the gram matrix and divides by the number of locations
  args:
    input_tensor: tensor of shape (batch, height, width, channels)
    
    
  return:
    scaled_gram: gram matrix divided by the number of locations
  """

  # Calculate the gram matrix of the input tensor
  gram = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor) 

  # get the height and width of the input tensor
  input_shape = tf.shape(input_tensor) 
  height = input_shape[1] 
  width = input_shape[2] 

  # Get the number of locations (height times width), and cast it as a tf.float32
  num_locations = tf.cast(height * width, tf.float32)

  # Scale the gram matrix by dividing by the number of locations
  scaled_gram = gram / num_locations
    
  # return the scaled gram matrix
  return scaled_gram

### Get the style image features

Given the style image as input, you'll get the style features of the custom vgg model that you just created using `vgg_model()`.
- You'll first preprocess the image using the given `preprocess_image` function.
- You'll then get the outputs of the vgg model.
- From the outputs, just get the style feature layers and not the content feature layer.

You can run the following code to check the order of the layers in your custom vgg model:

In [None]:
tmp_layer_list = [layer.output for layer in vgg.layers]
tmp_layer_list

- For each style layer, calculate the gram matrix.  Store these results in a list and return it.

In [None]:
def get_style_image_features(image):  
  """ Get the style image features
  
  args:
    image: an input image
    
  return:
    gram_style_features: the style features as gram matrices
  """
  # preprocess the image using the given preprocessing function
  preprocessed_style_image = preprocess_image(image) 

  # get the outputs from the custom vgg model that you created using vgg_model()
  outputs = vgg(preprocessed_style_image) 

  # Get just the style feature layers (exclude the content layer)
  style_outputs = outputs[:num_style_layers] 

  # for each style layer, calculate the gram matrix for that layer and store these results in a list
  gram_style_features = [gram_matrix(style_layer) for style_layer in style_outputs] 

  return gram_style_features

### Get content image features

Given the content image as input, you'll get the style features (not the content features) of the content image.
- You can follow a similar process as you did with `get_style_image_features`.
- For the content image, you will not calculate the gram matrix of these style features.

In [None]:
def get_content_image_features(image):

  # preprocess the image
  preprocessed_content_image = preprocess_image(image)
    
  # get the outputs from the vgg model
  outputs = vgg(preprocessed_content_image) 

  # get the style layers of the outputs (excluding the content layer output)
  style_outputs = outputs[num_style_layers:]

  # return the style layer outputs of the content image
  return style_outputs

### Calculate the style and content loss



In [None]:
def get_style_content_loss(style_targets, style_outputs, content_targets, content_outputs, style_weight, content_weight):
    
  # Sum of the style losses
  style_loss = tf.add_n([ get_style_loss(style_output, style_target)
                           for style_output, style_target in zip(style_outputs, style_targets)])
  
  # Sum up the content losses
  content_loss = tf.add_n([get_content_loss(content_output, content_target)
                           for content_output, content_target in zip(content_outputs, content_targets)])

  # scale the style loss by multiplying by the style weight and dividing by the number of style layers
  style_loss = style_loss * style_weight / num_style_layers 

  # scale the content loss by multiplying by the content weight and dividing by the number of content layers
  content_loss =content_loss * content_weight / num_content_layers 
    
  # sum up the style and content losses
  total_loss = style_loss + content_loss 

  # return the total loss
  return total_loss

### Clip the image

This is given to you

In [None]:
def clip_0_1(image):
  return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

### Calculate gradients


In [None]:
def calculate_gradients(image, content_targets, style_targets, style_weight, content_weight):
    total_variation_weight = 30

    with tf.GradientTape() as tape:

      # scale up the image from [0 to 1] to [0 to 255] by multiplying by 255
      image = image * 255 
        
      # get the style image features
      style_features = get_style_image_features(image) 
        
      # get the content image features
      content_features = get_content_image_features(image) 
        
      # get the style and content loss
      loss = get_style_content_loss(style_targets, style_features, content_targets, content_features, style_weight, content_weight) 

    # calculate gradients of loss with respect to the image
    gradients = tape.gradient(loss, image) 

    # return gradients
    return gradients

### Update the image with the style


In [None]:
def update_image_with_style(image, content_targets, style_targets, optimizer, style_weight, content_weight):

  # Calculate gradients using the function that you just defined.
  gradients = calculate_gradients(image, content_targets, style_targets, style_weight, content_weight) 

  # apply the gradients to the given image
  optimizer.apply_gradients([(gradients, image)]) 

  # Clip the image using the given clip_0_1() function
  image.assign(clip_0_1(image))

## Train Model

In [None]:
def fit_style_transfer(input_image, style_image, optimizer, epochs=1, steps_per_epoch=1, style_weight = 0.01, with_regularization=False):

  images = []
  import time
  start = time.time()

  step = 0

  #style_weight=1.0
  content_weight=1e2

  # get the style image features 
  style_targets = get_style_image_features(style_image)
    
  # get the content image features
  content_targets = get_content_image_features(input_image)


  input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32)
  
  input_image = tf.Variable(input_image) 
  images.append(tf.Variable(input_image)) 
  
  for n in range(epochs):
    for m in range(steps_per_epoch):
      step += 1
    
      # Update the image with the style using the function that you defined
      update_image_with_style(input_image, content_targets, style_targets, optimizer, style_weight, content_weight) 
    

      print(".", end='')
      if (m + 1) % 10 == 0:
        images.append(tf.Variable(input_image))
    
    display_obj.clear_output(wait=True)
    display_image = tensor_to_image(input_image)

    
    display_obj.display(display_image)
    images.append(tf.Variable(input_image))
    print("Train step: {}".format(step))
  end = time.time()
  print("Total time: {:.1f}".format(end-start)) 
  
  return input_image, images

### Load some images to try it out!

In [None]:
content_image, style_image = load_images("swan-2107052_1280.jpg", "animals-985500_1280.jpg")

In [None]:
weight =  0.01 
adam = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
stylized_image, display_images = fit_style_transfer(input_image=content_image, style_image=style_image, optimizer=adam, epochs=10, steps_per_epoch=100, style_weight=weight)

In [None]:
# Display Utilities

import imageio
from IPython.display import display as display_fn
from IPython.display import Image

def display_gif(GIF_PATH):
  with open(GIF_PATH,'rb') as f:
    display_fn(Image(data=f.read(), format='png'))

def create_gif(images):
  GIF_PATH = "/content/{}.gif".format(randint(0, 10000))
  imageio.mimsave(GIF_PATH, images, fps=1)
  return GIF_PATH


In [None]:
# Display GIF of Intermedite Outputs
gif_images = [np.squeeze(image.numpy(), axis=0) for image in display_images]
gif_path = create_gif(gif_images)

In [None]:
display_gif(gif_path)

## End of Lesson 1 ungraded lab

Please go back to the classroom and watch lesson 2 regarding the total variation loss. Then you can continue on to the next section below.

## Total variation loss

One downside to this basic implementation is that it produces a lot of high frequency artifacts. Decrease these using an explicit regularization term on the high frequency components of the image. In style transfer, this is often called the *total variation loss*. Let's define the `calculate_gradients()` function again but this time with a regularization parameter to compute the total variation loss.

In [None]:
def calculate_gradients(image, content_targets, style_targets, style_weight, content_weight, with_regularization=False):
    total_variation_weight = 160

    with tf.GradientTape() as tape:

      # scale up the image from [0 to 1] to [0 to 255] by multiplying by 255
      image = image * 255 
        
      # get the style image features
      style_features = get_style_image_features(image) 
        
      # get the content image features
      content_features = get_content_image_features(image) 
        
      # get the style and content loss
      loss = get_style_content_loss(style_targets, style_features, content_targets, content_features, style_weight, content_weight)

      # to take into account the total variation loss (discussed in Lesson 2)
      if with_regularization:
        loss += total_variation_weight*tf.image.total_variation(image)

    # calculate gradients of loss with respect to the image
    gradients = tape.gradient(loss, image) 

    # return gradients
    return gradients

In [None]:
# Plot Utilities
def high_pass_x_y(image):
  x_var = image[:,:,1:,:] - image[:,:,:-1,:]
  y_var = image[:,1:,:,:] - image[:,:-1,:,:]

  return x_var, y_var

def plot_deltas_for_single_image(x_deltas, y_deltas, name="Original", row=1):
  plt.figure(figsize=(14,10))
  plt.subplot(row,2,1)
  plt.yticks([])
  plt.xticks([])

  imshow(clip_0_1(2*y_deltas+0.5), "Horizontal Deltas: {}".format(name))

  plt.subplot(row,2,2)
  plt.yticks([])
  plt.xticks([])
  
  imshow(clip_0_1(2*x_deltas+0.5), "Vertical Deltas: {}".format(name))

def plot_deltas(original_image_deltas, stylized_image_deltas):
  orig_x_deltas, orig_y_deltas = original_image_deltas
  
  stylized_x_deltas, stylized_y_deltas = stylized_image_deltas

  plot_deltas_for_single_image(orig_x_deltas, orig_y_deltas, name="Original")
  plot_deltas_for_single_image(stylized_x_deltas, stylized_y_deltas, name="Stylized Image", row=2)

In [None]:
# Display Frequency Variations

original_x_deltas, original_y_deltas = high_pass_x_y(tf.image.convert_image_dtype(content_image, dtype=tf.float32))
stylized_image_x_deltas, stylized_image_y_deltas = high_pass_x_y(stylized_image)

plot_deltas((original_x_deltas, original_y_deltas), (stylized_image_x_deltas, stylized_image_y_deltas))

## Re-run the optimization

Choose a weight for the `total_variation_loss`:

In [None]:
variation_model_weight =   0.01

stylized_image1, display_images1 = fit_style_transfer(input_image=content_image, style_image=style_image, optimizer=adam, epochs=10, steps_per_epoch=100, with_regularization=True, style_weight=variation_model_weight)

In [None]:
# Display GIF

gif_images1 = [np.squeeze(image.numpy(), axis=0) for image in display_images1]
gif_path1 = create_gif(gif_images1)
display_gif(gif_path1)

In [None]:
# Display Frequency Variations

original_x_deltas, original_y_deltas = high_pass_x_y(tf.image.convert_image_dtype(content_image, dtype=tf.float32))
stylized_image_x_deltas, stylized_image_y_deltas = high_pass_x_y(stylized_image1)

plot_deltas((original_x_deltas, original_y_deltas), (stylized_image_x_deltas, stylized_image_y_deltas))

In [None]:
show_images_with_objects([style_image, content_image, stylized_image], titles=['Style Image', 'Content Image', 'Stylized Image'])

In [None]:
show_images_with_objects([style_image, content_image, stylized_image1], titles=['Style Image', 'Content Image', 'Stylized Image'])