# AI Art with Style Transfer

First, start out with some imports.

In [None]:
import time
import numpy as np
import pandas as pd
from PIL import Image
from keras import backend
from keras.preprocessing.image import load_img, img_to_array
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input
from scipy.optimize import f_min_l_bfgs_b

Specify the paths for the content image, the style image, and the output image.

In [None]:
c_image_path = ''
s_image_path = ''
o_image_path = ''

Image proccessing

In [None]:
target_height = 512
target_width = 512
target_size = (target_height, target_width)

# Get size of the original image
c_image_original = Image.open(content_image_path)
c_image_original_size = c_image_original.size
# Loads content image to PIL format
c_image = load_img(path=content_image_path, target_size=target_size)
# Turns image into extended numpy array
c_image_arr = img_to_array(c_image)
# Expand the first dimension, preprocess input for given model, and add array to the backend
# (The extra dimension is added because keras expects an array of samples; thus, the image must be
# part of an array)
c_image_arr = backend.variable(preprocess_input(np.expand_dims(c_image_arr, axis=0)), dtype='float32', name='c_image_arr')

# Perform same operations on style image...
s_image = load_img(path=s_image_path, target_size=target_size)
s_image_arr = img_to_array(s_image)
s_image_arr = backend.variable(preprocess_input(np.expend_dims(s_image_arr, axis=0)), dtype='float32')

# Initialize the output image as numpy array of shape (target_width, target_height, 3) with random RGB values
o_image_initial = np.random.randint(256, size=(target_width, target_height, 3)).astype('float64')
o_image_initial = preprocess_input(np.expand(dims, axis=0))
# Instantiates a placeholder tensor
o_image_placeholder = backend.placeholder(shape=(1, target_width, target_height, 3))

## Content Loss
The content loss function must be formulated in order to ensure that the generated image x retains some of the "global" characteristics of the content image p. To achieve this, the content loss function is defined as the mean squared error between the feature representations of p and x, respectively, at a given layer l:

$$ L_c(p,x,l) = \frac{1}{2}\sum_{i,j}^{} (F^l_{i,j} - P^l_{i,j})^2 $$

<ul>
    <li><i>F</i> and <i>P</i> and are matrices of size <i>N</i> x <i>M</i></li>
    <li><i>N</i> is the number of filters in layer <i>l</i> and <i>M</i> is the number of spatial elements in the feature map (height times width) for layer <i>l</i></li>
    <li><i>F</i> contains the feature representation of <i>x</i> for layer <i>l</i></li>
    <li><i>P</i> contains the feature representation of <i>p</i> for layer <i>l</i></li>
</ul>

In [None]:
def get_feature_reps(x, layer_names, model):
    """
    Get feature representations of input x for one or more layers in a given model.
    """
    f_matrices = []
    for l in layer_names:
        current_layer = model.get_layer(l)
        feature_raw = current_layer.output
        feature_raw_shape = backend.shape(feature_raw).eval(session=tf_session)
        N_l = feature_raw_shape[-1]
        M_l = feature_raw_shape[1] * feature_raw_shape[2]
        feature_matrix = backend.reshape(feature_raw, (M_l, N_l))
        feature_matrix = backend.transpose(feature_matrix)
        feature_matrices.append(feature_matrix)
    return feature_matrices

In [None]:
def get_content_loss(F, P):
    content_loss = 1/2 * backend.sum(backend.square(F - P))
    return content_loss

## Style Loss
Conversely, style loss is designed to preserve the stylisitc characteristics of the style image, <i>a</i>. As opposed to using the difference between feature representations, use the difference between Gram matrices from selected layers. The Gram matrix is a square matrix that contains the dot products between each vectorized filter in layer <i>l</i>, and it can therefore be thought of as a non-normalized correlation matrix for filters in the layer. The Gram matrix is defined as follows:
    
$$ G^l = F^l(F^l)^T $$

In [1]:
def get_gram_matrix(F):
    G = backend.dot(F, K.transpose(F))
    return G

Then, the loss contribution from layer <i>l</i> is

$$ E_l = \frac{1}{4N^2_lM^2_l} \sum_{i,j}(G^l_{ij}-A^l_{ij})^2$$

where <i>A</i> is the Gram matrix for the style image <i>a</i> and <i>G</i> is the Gram matrix for the generate image <i>x</i>. Ascending layers in most convolutional networks such as VGG have increasingly larger receptive fields. As this receptive field grows, more large-scale characteristics of the input image are preserved. Because of this, multiple layers should be selected for “style” to incorporate both local and global stylistic qualities. To create a smooth blending between these different layers, we can assign a weight <i>w</i> to each layer, and define the total style loss as:

$$ L_s(a,x,l) = \sum_l^L w_lE_l $$

In [None]:
def get_style_loss(ws, Gs, As):
    style_loss = backend.variable(0.)
    for w, G, A in zip(ws, Gs, As):
        M_l = backend.int_shape(G)[1]
        N_l = backend.int_shape(G)[0]
        G_gram = get_gram_matrix(G)
        A_gram = get_gram_matrix(A)
        style_loss += w * 1/4 * backend.sum(backend.square(G_gram - A_gram)) / (N_l**2 * M_l**2)
    return style_loss