<small>
Copyright (c) 2017 Andrew Glassner

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
</small>



# Deep Learning From Basics to Practice
## by Andrew Glassner, https://dlbasics.com, http://glassner.com
------
## Chapter 27: Applications
### Notebook 4: Style Transfer

This notebook is provided as a “behind-the-scenes” look at code used to make some of the figures in this chapter. It is still in the hacked-together form used to develop the figures, and is only lightly commented.

### How to run this code:
- Find the cell marked Constants. 
- Set the variables there - in particular, set the input and output files
- Save the notebook
- Choose the Kernel menu, then Restart & Run All
- Wait a while!

### About this code:
This notebook is a minor restructuring of code from
https://github.com/titu1994/Neural-Style-Transfer
by Somshubra Majumdar (titu1994).

See License E in LICENSE.txt

In [None]:

from scipy.misc import imread, imresize, imsave, fromimage, toimage
from scipy.optimize import fmin_l_bfgs_b
import numpy as np
import time
import argparse
import warnings

from keras.models import Model
from keras.layers import Input
from keras.layers.convolutional import Convolution2D, AveragePooling2D, MaxPooling2D
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.utils.layer_utils import convert_all_kernels_in_model

# Just in case the Keras defaults aren't as we expect
K.set_image_data_format('channels_last')

def about():
    """
    Neural Style Transfer with Keras 2.0.5

    Based on:
    https://github.com/fchollet/keras/blob/master/examples/neural_style_transfer.py

    Contains few improvements suggested in the paper Improving the Neural Algorithm of Artistic Style
    (http://arxiv.org/abs/1605.04603).

    -----------------------------------------------------------------------------------------------------------------------
    """

In [None]:
save_files = True

import os, sys, inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0, os.path.dirname(current_dir)) # path to parent dir
from DLBasics_Utilities import File_Helper
file_helper = File_Helper(save_files)

file_helper.check_for_directory(file_helper.get_saved_output_dir())

# Constants
## Set up the transfer here, then reset and run the whole notebook

In [None]:
# CONSTANTS 
#
# SET UP THE TRANFER HERE
#
base_image_path = 'input_data/waters-3038803_1280-crop.jpg'
style_image_paths = [ 'input_data/HR-Self-Portrait-1907-Picasso.jpg' ]

content_weight = 0.025
style_weights = [1]
image_size = 400
total_variation_weight = 8.5e-5
num_iter = 10

model_name = 'vgg16'
content_loss_type = 0
rescale_image = True
rescale_method = 'bicubic'
maintain_aspect_ratio = True
result_prefix = file_helper.get_saved_output_dir()+'/style-xfer-'

content_layer = 'block1_conv2'
num_style_layers = 13

init_image ='content'  # try 'noise'
pool_type_name = 'ave'
preserve_color = False

style_masks = None
content_mask = None
color_mask = None
mask_path = None
content_mask_path = None
style_masks_present = False
content_mask_present = False
color_mask_present = False
style_scale = 1.0
min_improvement = 0

In [None]:
pooltype = 1 if pool_type_name == "ave" else 0
read_mode = "color"

# dimensions of the generated picture.
img_width = img_height = 0
img_WIDTH = img_HEIGHT = 0
aspect_ratio = 0

In [None]:
# globals
nb_tensors = None
nb_style_images = None
combination_image = None

In [None]:
# util function to open, resize and format pictures into appropriate tensors
def preprocess_image(image_path, load_dims=False, read_mode="color"):
    global img_width, img_height, img_WIDTH, img_HEIGHT, aspect_ratio

    mode = "RGB" if read_mode == "color" else "L"
    img = imread(image_path, mode=mode)  # Prevents crashes due to PNG images (ARGB)

    if mode == "L":
        # Expand the 1 channel grayscale to 3 channel grayscale image
        temp = np.zeros(img.shape + (3,), dtype=np.uint8)
        temp[:, :, 0] = img
        temp[:, :, 1] = img.copy()
        temp[:, :, 2] = img.copy()

        img = temp

    if load_dims:
        img_WIDTH = img.shape[0]
        img_HEIGHT = img.shape[1]
        aspect_ratio = float(img_HEIGHT) / img_WIDTH

        img_width = image_size
        if maintain_aspect_ratio:
            img_height = int(img_width * aspect_ratio)
        else:
            img_height = image_size

    img = imresize(img, (img_width, img_height)).astype('float32')

    # RGB -> BGR
    img = img[:, :, ::-1]

    img[:, :, 0] -= 103.939
    img[:, :, 1] -= 116.779
    img[:, :, 2] -= 123.68

    if K.image_dim_ordering() == "th":
        img = img.transpose((2, 0, 1)).astype('float32')

    img = np.expand_dims(img, axis=0)
    return img


# util function to convert a tensor into a valid image
def deprocess_image(x):
    if K.image_dim_ordering() == "th":
        x = x.reshape((3, img_width, img_height))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_width, img_height, 3))

    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68

    # BGR -> RGB
    x = x[:, :, ::-1]

    x = np.clip(x, 0, 255).astype('uint8')
    return x


# util function to preserve image color
def original_color_transform(content, generated, mask=None):
    generated = fromimage(toimage(generated, mode='RGB'), mode='YCbCr')  # Convert to YCbCr color space

    if mask is None:
        generated[:, :, 1:] = content[:, :, 1:]  # Generated CbCr = Content CbCr
    else:
        width, height, channels = generated.shape

        for i in range(width):
            for j in range(height):
                if mask[i, j] == 1:
                    generated[i, j, 1:] = content[i, j, 1:]

    generated = fromimage(toimage(generated, mode='YCbCr'), mode='RGB')  # Convert to RGB color space
    return generated

In [None]:
def load_mask(mask_path, shape, return_mask_img=False):
    if K.image_dim_ordering() == "th":
        _, channels, width, height = shape
    else:
        _, width, height, channels = shape

    mask = imread(mask_path, mode="L") # Grayscale mask load
    mask = imresize(mask, (width, height)).astype('float32')

    # Perform binarization of mask
    mask[mask <= 127] = 0
    mask[mask > 128] = 255

    max = np.amax(mask)
    mask /= max

    if return_mask_img: return mask

    mask_shape = shape[1:]

    mask_tensor = np.empty(mask_shape)

    for i in range(channels):
        if K.image_dim_ordering() == "th":
            mask_tensor[i, :, :] = mask
        else:
            mask_tensor[:, :, i] = mask

    return mask_tensor

In [None]:
def pooling_func(x):
    if pooltype == 1:
        return AveragePooling2D((2, 2), strides=(2, 2))(x)
    else:
        return MaxPooling2D((2, 2), strides=(2, 2))(x)

In [None]:
def get_input_tensor():
    global nb_tensors, nb_style_images
    global combination_image

    # get tensor representations of our images
    base_image = K.variable(preprocess_image(base_image_path, True, read_mode=read_mode))

    style_reference_images = []
    for style_path in style_image_paths:
        style_reference_images.append(K.variable(preprocess_image(style_path)))

    # this will contain our generated image
    combination_image = K.placeholder((1, img_width, img_height, 3))

    image_tensors = [base_image]
    for style_image_tensor in style_reference_images:
        image_tensors.append(style_image_tensor)
    image_tensors.append(combination_image)

    nb_tensors = len(image_tensors)
    nb_style_images = nb_tensors - 2 # Content and Output image not considered

    # combine the various images into a single Keras tensor
    input_tensor = K.concatenate(image_tensors, axis=0)

    shape = (nb_tensors, img_width, img_height, 3)

    ip = Input(tensor=input_tensor, batch_shape=shape)
    return ip

In [None]:
def get_model_and_feature_layers():
    ip = get_input_tensor()
    
    # build the VGG16 network with our 3 images as input
    x = Convolution2D(64, (3, 3), activation='relu', name='block1_conv1', padding='same')(ip)
    x = Convolution2D(64, (3, 3), activation='relu', name='block1_conv2', padding='same')(x)
    x = pooling_func(x)

    x = Convolution2D(128, (3, 3), activation='relu', name='block2_conv1', padding='same')(x)
    x = Convolution2D(128, (3, 3), activation='relu', name='block2_conv2', padding='same')(x)
    x = pooling_func(x)

    x = Convolution2D(256, (3, 3), activation='relu', name='block3_conv1', padding='same')(x)
    x = Convolution2D(256, (3, 3), activation='relu', name='block3_conv2', padding='same')(x)
    x = Convolution2D(256, (3, 3), activation='relu', name='block3_conv3', padding='same')(x)
    if model_name == "vgg19":
        x = Convolution2D(256, (3, 3), activation='relu', name='block3_conv4', padding='same')(x)
    x = pooling_func(x)

    x = Convolution2D(512, (3, 3), activation='relu', name='block4_conv1', padding='same')(x)
    x = Convolution2D(512, (3, 3), activation='relu', name='block4_conv2', padding='same')(x)
    x = Convolution2D(512, (3, 3), activation='relu', name='block4_conv3', padding='same')(x)
    if model_name == "vgg19":
        x = Convolution2D(512, (3, 3), activation='relu', name='block4_conv4', padding='same')(x)
    x = pooling_func(x)

    x = Convolution2D(512, (3, 3), activation='relu', name='block5_conv1', padding='same')(x)
    x = Convolution2D(512, (3, 3), activation='relu', name='block5_conv2', padding='same')(x)
    x = Convolution2D(512, (3, 3), activation='relu', name='block5_conv3', padding='same')(x)
    if model_name == "vgg19":
        x = Convolution2D(512, (3, 3), activation='relu', name='block5_conv4', padding='same')(x)
    x = pooling_func(x)

    model = Model(ip, x)
    
    TF_16_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
    TF_19_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'

    if model_name == "vgg19":
        weights = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', TF_19_WEIGHTS_PATH_NO_TOP, cache_subdir='models')
        feature_layers = ['block1_conv1', 'block1_conv2',
                          'block2_conv1', 'block2_conv2',
                          'block3_conv1', 'block3_conv2', 'block3_conv3', 'block3_conv4',
                          'block4_conv1', 'block4_conv2', 'block4_conv3', 'block4_conv4',
                          'block5_conv1', 'block5_conv2', 'block5_conv3', 'block5_conv4']
    else:
        weights = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', TF_16_WEIGHTS_PATH_NO_TOP, cache_subdir='models')
        feature_layers = ['block1_conv1', 'block1_conv2',
                          'block2_conv1', 'block2_conv2',
                          'block3_conv1', 'block3_conv2', 'block3_conv3',
                          'block4_conv1', 'block4_conv2', 'block4_conv3',
                          'block5_conv1', 'block5_conv2', 'block5_conv3' ]

    model.load_weights(weights)

    print('Model loaded.')
    return (model, feature_layers)

In [None]:
# compute the neural style loss
# first we need to define 4 util functions

# Improvement 1
# the gram matrix of an image tensor (feature-wise outer product) using shifted activations
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_dim_ordering() == "th":
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features - 1, K.transpose(features - 1))
    return gram


# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image
def style_loss(style, combination, mask_path=None, nb_channels=None):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3

    if content_mask_path is not None:
        content_mask = K.variable(load_mask(content_mask_path, nb_channels))
        combination = combination * K.stop_gradient(content_mask)
        del content_mask

    if mask_path is not None:
        style_mask = K.variable(load_mask(mask_path, nb_channels))
        style = style * K.stop_gradient(style_mask)
        if content_mask_path is None:
            combination = combination * K.stop_gradient(style_mask)
        del style_mask

    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_width * img_height
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))


# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image
def content_loss(base, combination):
    channel_dim = 0 if K.image_dim_ordering() == "th" else -1

    try:
        channels = K.int_shape(base)[channel_dim]
    except TypeError:
        channels = K.shape(base)[channel_dim]
    size = img_width * img_height

    if content_loss_type == 1:
        multiplier = 1. / (2. * (channels ** 0.5) * (size ** 0.5))
    elif content_loss_type == 2:
        multiplier = 1. / (channels * size)
    else:
        multiplier = 1.

    return multiplier * K.sum(K.square(combination - base))


# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent
def total_variation_loss(x):
    assert K.ndim(x) == 4
    a = K.square(x[:, :img_width - 1, :img_height - 1, :] - x[:, 1:, :img_height - 1, :])
    b = K.square(x[:, :img_width - 1, :img_height - 1, :] - x[:, :img_width - 1, 1:, :])
    return K.sum(K.pow(a + b, 1.25))

In [None]:
def eval_loss_and_grads(x):
    x = x.reshape((1, img_width, img_height, 3))
    outs = f_outputs([x])
    loss_value = outs[0]
    if len(outs[1:]) == 1:
        grad_values = outs[1].flatten().astype('float64')
    else:
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

In [None]:
# this Evaluator class makes it possible
# to compute loss and gradients in one pass
# while retrieving them via two separate functions,
# "loss" and "grads". This is done because scipy.optimize
# requires separate functions for loss and gradients,
# but computing them separately would be inefficient.
class Evaluator(object):
    def __init__(self):
        self.loss_value = None
        self.grads_values = None

    def loss(self, x):
        assert self.loss_value is None
        loss_value, grad_values = eval_loss_and_grads(x)
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value

    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values

In [None]:
model, feature_layers = get_model_and_feature_layers()

In [None]:
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
shape_dict = dict([(layer.name, layer.output_shape) for layer in model.layers])

In [None]:
evaluator = Evaluator()

In [None]:
# combine the loss functions into a single scalar
loss = K.variable(0.)
layer_features = outputs_dict[content_layer]
base_image_features = layer_features[0, :, :, :]
combination_features = layer_features[nb_tensors - 1, :, :, :]
#loss += content_weight * content_loss(base_image_features, combination_features)

In [None]:
# Improvement 2
# Use all layers for style feature extraction and reconstruction
nb_layers = len(feature_layers) - 1

style_masks = []
if style_masks_present:
    style_masks = mask_paths # If mask present, pass dictionary of masks to style loss
else:
    style_masks = [None for _ in range(nb_style_images)] # If masks not present, pass None to the style loss

channel_index = 1 if K.image_dim_ordering() == "th" else -1

In [None]:
# Improvement 3 : Chained Inference without blurring
#AG print("len feature_layers = ",len(feature_layers))
#print("summing up this many style layers: ",(min(args.num_style_layers, len(feature_layers)-1)))
#print("nb_style_image = ",nb_style_images)
layers_to_use  = min(num_style_layers, len(feature_layers))
for i in range(layers_to_use-1):
    layer_features = outputs_dict[feature_layers[i]]
    shape = shape_dict[feature_layers[i]]
    combination_features = layer_features[nb_tensors - 1, :, :, :]
    style_reference_features = layer_features[1:nb_tensors - 1, :, :, :]
    sl1 = []
    for j in range(nb_style_images):
        #sl1.append(style_loss(style_reference_features[j], combination_features, style_masks[j], shape))
        sl1.append(style_loss(style_reference_features[j], combination_features, None, shape))  # AG

    layer_features = outputs_dict[feature_layers[i + 1]]
    shape = shape_dict[feature_layers[i + 1]]
    combination_features = layer_features[nb_tensors - 1, :, :, :]
    style_reference_features = layer_features[1:nb_tensors - 1, :, :, :]
    sl2 = []
    for j in range(nb_style_images):
        # sl2.append(style_loss(style_reference_features[j], combination_features, style_masks[j], shape))
        sl2.append(style_loss(style_reference_features[j], combination_features, None, shape)) # AG

    for j in range(nb_style_images):
        sl = sl1[j] - sl2[j]

        # Improvement 4
        # Geometric weighted scaling of style loss
        loss += (style_weights[j] / (2 ** (layers_to_use- (i + 1)))) * sl

In [None]:
loss += total_variation_weight * total_variation_loss(combination_image)

# get the gradients of the generated image wrt the loss
grads = K.gradients(loss, combination_image)

outputs = [loss]
if type(grads) in {list, tuple}:
    outputs += grads
else:
    outputs.append(grads)

f_outputs = K.function([combination_image], outputs)

In [None]:
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss


if init_image == 'content':
    x = preprocess_image(base_image_path, True, read_mode=read_mode)
elif init_image == 'noise':
    x = np.random.uniform(0, 255, (1, img_width, img_height, 3)) - 128.
else:
    print("Hey! Don't know init_image = ",init_image)

# We require original image if we are to preserve color in YCbCr mode
if preserve_color:
    content = imread(base_image_path, mode="YCbCr")
    content = imresize(content, (img_width, img_height))

    if color_mask_present:
        color_mask_shape = (None, img_width, img_height, None)
        color_mask = load_mask(color_mask, color_mask_shape, return_mask_img=True)
    else:
        color_mask = None
else:
    color_mask = None

num_iter = num_iter
prev_min_val = -1

improvement_threshold = float(min_improvement)


In [None]:
for i in range(num_iter):
    print("Starting iteration %d of %d" % ((i + 1), num_iter))
    start_time = time.time()

    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20)

    if prev_min_val == -1:
        prev_min_val = min_val

    improvement = (prev_min_val - min_val) / prev_min_val * 100

    print("Current loss value:", min_val, " Improvement : %0.3f" % improvement, "%")
    prev_min_val = min_val
    # save current generated image
    img = deprocess_image(x.copy())

    if preserve_color and content is not None:
        img = original_color_transform(content, img, mask=color_mask)

    if not rescale_image:
        img_ht = int(img_width * aspect_ratio)
        print("Rescaling Image to (%d, %d)" % (img_width, img_ht))
        img = imresize(img, (img_width, img_ht), interp=rescale_method)

    if rescale_image:
        print("Rescaling Image to (%d, %d)" % (img_WIDTH, img_HEIGHT))
        img = imresize(img, (img_WIDTH, img_HEIGHT), interp=rescale_method)

    fname = result_prefix + "at_iteration_%d.png" % (i + 1)
    imsave(fname, img)
    end_time = time.time()
    print("Image saved as", fname)
    print("Iteration %d completed in %ds" % (i + 1, end_time - start_time))

    if improvement_threshold is not 0.0:
        if improvement < improvement_threshold and improvement is not 0.0:
            print("Improvement (%f) is less than improvement threshold (%f). Early stopping script." %
                  (improvement, improvement_threshold))
            exit()
