# Table of Contents
 <p><div class="lev4 toc-item"><a href="#Some-Tests-for-backend-functions" data-toc-modified-id="Some-Tests-for-backend-functions-0001"><span class="toc-item-num">0.0.0.1&nbsp;&nbsp;</span>Some Tests for backend functions</a></div><div class="lev4 toc-item"><a href="#kears.backend.permute_dimensions" data-toc-modified-id="kears.backend.permute_dimensions-0002"><span class="toc-item-num">0.0.0.2&nbsp;&nbsp;</span>kears.backend.permute_dimensions</a></div><div class="lev3 toc-item"><a href="#Continue-our-example" data-toc-modified-id="Continue-our-example-001"><span class="toc-item-num">0.0.1&nbsp;&nbsp;</span>Continue our example</a></div><div class="lev3 toc-item"><a href="#Define-the-Final-Loss" data-toc-modified-id="Define-the-Final-Loss-002"><span class="toc-item-num">0.0.2&nbsp;&nbsp;</span>Define the Final Loss</a></div><div class="lev4 toc-item"><a href="#Setting-up-the-gradient-descent-process" data-toc-modified-id="Setting-up-the-gradient-descent-process-0021"><span class="toc-item-num">0.0.2.1&nbsp;&nbsp;</span>Setting up the gradient-descent process</a></div><div class="lev4 toc-item"><a href="#Loop" data-toc-modified-id="Loop-0022"><span class="toc-item-num">0.0.2.2&nbsp;&nbsp;</span>Loop</a></div>

In [11]:
from keras.preprocessing.image import load_img, img_to_array

In [12]:
import tensorflow as tf
tf.compat.v1.disable_eager_execution()

In [13]:
import os
input_path = 'nueral_transfer_images'

target_image_path = os.path.join(input_path, 'portrait.jpg')
style_reference_image_path = os.path.join(input_path, 'transfer_style_reference.jpg')

In [14]:
width, height = load_img(target_image_path).size

In [15]:
img_height = 400

In [16]:
img_width = int(width * img_height / height)

In [17]:
import numpy as np
from keras.applications import vgg19
# Check out https://stackoverflow.com/questions/47555829/preprocess-input-method-in-keras
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_height, img_width)) # This loads an image and resizes the image to (400, scaled height)
    img = img_to_array(img) # input is PIL.Image.Image type, converet to (400, scaled height, 3)
    img = np.expand_dims(img, axis=0) # add the number of images: x.shape = (1, 224, 224, 3), so here batc_size = 1 is added
    
    # preprocess_input subtracts the mean RGB channels of the imagenet dataset. 
    # This is because the model you are using has been trained on a 
    # different dataset: x.shape is still (1, 224, 224, 3)
    img = vgg19.preprocess_input(img) 

    return img

# VGG networks are trained on the image with each channel normalized by mean = [103.939, 116.779, 123.68]and with channels BGR. Furthermore, 
# since our optimized image may take its values anywhere between −∞ and ∞ , we must clip to maintain our values from within the 0-255 range.
def deprocess_image(x):
    
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [19]:
from keras import backend as K

# target_image and style_reference_image will be static
target_image = K.constant(preprocess_image(target_image_path))
style_reference_image = K.constant(preprocess_image(style_reference_image_path))

# Note, we will vary combination_image (the output), by gradient descent, thus it's
# a placeholder
combination_image = K.placeholder((1, 
                                   img_height, 
                                   img_width, 3))

input_tensor = K.concatenate([target_image, 
                              style_reference_image, 
                              combination_image], axis=0)

model = vgg19.VGG19(input_tensor=input_tensor,
                    weights='imagenet',
                    include_top=False)

$$J_{content}(C,G) =  \frac{1}{4 \times n_H \times n_W \times n_C}\sum _{ \text{all entries}} (a^{(C)} - a^{(G)})^2\tag{1} $$

In [21]:
def content_loss(base, combination):
    # K.square --> elementwise square
    # K.sum --> sum all the entries
    # where is your coefficient?? 4*n_H * n_W * n_C
    return K.sum(K.square(combination - base))

#### Some Tests for backend functions

##### keras.backend.batch_flatten()

In [55]:
# turn a tensor into flat tensor (1D), however, this is different from flatten
# batch_flatten will keep the first dimension(the batch dimension)

a = tf.constant([[[1.0, 1.5], [2.0, 2.5]], [[3.0, 3.5], [4.0, 4.5]]])
print('input', a, '\n')
b = K.batch_flatten(a)
print('batch_flatten', b, '\n')
print('flatten', K.flatten(a))

input Tensor("Const_34:0", shape=(2, 2, 2), dtype=float32) 

batch_flatten Tensor("Reshape_20:0", shape=(2, 4), dtype=float32) 

flatten Tensor("Reshape_21:0", shape=(8,), dtype=float32)


#### kears.backend.permute_dimensions

In [9]:
import tensorflow as tf
#tf.compat.v1.disable_eager_execution()

import numpy as np
import keras.backend as K

a = np.arange(1, 25).reshape((2, 3, 4))
print("a shape:", a.shape)
print(a)
print('===================')
b = K.constant(a)
x = K.permute_dimensions(b, (1, 0, 2))


print("x shape", x.numpy().shape)
print(x.numpy())

#print(x1.numpy())


#run below for tf1 (no eager execution), otherwise, just pull out numpy() value
#with K.get_session() as ss:
#    print(ss.run(x))
#     print('-----')


a shape: (2, 3, 4)
[[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[13 14 15 16]
  [17 18 19 20]
  [21 22 23 24]]]
x shape (3, 2, 4)
[[[ 1.  2.  3.  4.]
  [13. 14. 15. 16.]]

 [[ 5.  6.  7.  8.]
  [17. 18. 19. 20.]]

 [[ 9. 10. 11. 12.]
  [21. 22. 23. 24.]]]


array([[[ 1.,  5.,  9.],
        [13., 17., 21.]],

       [[ 2.,  6., 10.],
        [14., 18., 22.]],

       [[ 3.,  7., 11.],
        [15., 19., 23.]],

       [[ 4.,  8., 12.],
        [16., 20., 24.]]], dtype=float32)

### Continue our example

<img src="images/NST_GM.png" style="width:900px;height:300px;">


In [22]:
def gram_matrix(x):
    # K.batch_flatten --> keep the first axis=0 same dimension, flat the rest
    # K.permute_dimensions --> switch the order of the tensor indices
    # This means, generate a matrix like above.
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

$$J_{style}^{[l]}(S,G) = \frac{1}{4 \times {n_C}^2 \times (n_H \times n_W)^2} \sum _{i=1}^{n_C}\sum_{j=1}^{n_C}(G^{(S)}_{ij} - G^{(G)}_{ij})^2\tag{2} $$

In [62]:
def style_loss(style, combination):
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_height * img_width
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

To these two loss components, you add a third: the total variation loss, which operates on the pixels of the generated combination image. It encourages spatial continuity in the generated image, thus avoiding overly pixelated results. You can interpret it as a regularization loss.

In [24]:
def total_variation_loss(x):
    a = K.square(
        x[:, :img_height - 1, :img_width - 1, :] -
        x[:, 1:, :img_width - 1, :])
    b = K.square(
        x[:, :img_height - 1, :img_width - 1, :] -
        x[:, :img_height - 1, 1:, :])
    return K.sum(K.pow(a + b, 1.25))


### Define the Final Loss

In [38]:
model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(3, 400, 485, 3)]        0         
_________________________________________________________________
block1_conv1 (Conv2D)        (3, 400, 485, 64)         1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (3, 400, 485, 64)         36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (3, 200, 242, 64)         0         
_________________________________________________________________
block2_conv1 (Conv2D)        (3, 200, 242, 128)        73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (3, 200, 242, 128)        147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (3, 100, 121, 128)        0     

In [26]:
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
content_layer = 'block5_conv2'
style_layers = ['block1_conv1',
                'block2_conv1',
                'block3_conv1',
                'block4_conv1',
                'block5_conv1']
total_variation_weight = 1e-4
style_weight = 1.
content_weight = 0.025
loss = K.variable(0.)
layer_features = outputs_dict[content_layer]
target_image_features = layer_features[0, :, :, :] # block5_conv has shape  (3, 25, 30, 512) 
combination_features = layer_features[2, :, :, :] # the input is (target, ref, combination), thus index 0 is target activation, index 2 is combination activation

# content_loss
loss = loss + content_weight * content_loss(target_image_features, combination_features)

# style_loss
for layer_name in style_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1, :, :, :] # layer_features[1:] is the reference activation
    combination_features = layer_features[2, :, :, :]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(style_layers)) * sl

# add a total varaition loss, as a regulariztion?
loss += total_variation_weight * total_variation_loss(combination_image)


In [40]:
loss

<tf.Tensor 'add_7:0' shape=() dtype=float32>

#### Setting up the gradient-descent process

In [16]:
# import tensorflow as tf
# tf.compat.v1.disable_eager_execution()

In [18]:
grads = K.gradients(loss, combination_image)[0] # output is a list of 0 element, so get it..
# combination_image is the placeholder, our output

fetch_loss_and_grads = K.function([combination_image], [loss, grads]) # first argument is input, second argunment are the outputs
# thus, fetch_loss_and_grads will be a tuple, that include the loss, grads, given combination_image

# Just two functions.. loss and grads, need for fmin_l_bfgs_b, fancier way.. use class
class Evaluator(object):
    def __init__(self):
        self.loss_value = None
        self.grads_values = None
        
    def loss(self, x):
        assert self.loss_value is None
        x = x.reshape((1, img_height, img_width, 3))
        outs = fetch_loss_and_grads([x])
        loss_value = outs[0]
        grad_values = outs[1].flatten().astype('float64')
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value

    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values
    
evaluator = Evaluator()

#### Loop

In [42]:
from scipy.optimize import fmin_l_bfgs_b
from keras.preprocessing.image import save_img
#from scipy.misc import imsave # bug
import time


In [21]:
result_prefix = 'my_result'
iterations = 20

# path of portrait
# Preprocessed numpy.array or a tf.Tensor with type float32.
# The images are converted from RGB to BGR, then each color channel is zero-centered with respect to the ImageNet dataset, without scaling.

x = preprocess_image(target_image_path) 
print(x.shape)
x = x.flatten()
print(x.shape)

In [None]:
# https://github.com/scipy/scipy/blob/v1.7.1/scipy/optimize/lbfgsb.py#L48-L207
# fmin_1_bfgs_b():
#     func : callable f(x,*args)
#         Function to minimize.
#     x0 : ndarray
#         Initial guess.
#     fprime : callable fprime(x,*args), optional
#         The gradient of `func`. If None, then `func` returns the function
#         value and the gradient (``f, g = func(x, *args)``), unless
#         `approx_grad` is True in which case `func` returns only ``f``.
#    maxfun : int, optional
#        Maximum number of function evaluations.
# ...
#    Returns
#     -------
#     x : array_like
#         Estimated position of the minimum.
#     f : float
#         Value of `func` at the minimum.
#     d : dict
#         Information dictionary.
#         * d['warnflag'] is
#           - 0 if converged,
#           - 1 if too many function evaluations or too many iterations,
#           - 2 if stopped for another reason, given in d['task']
#         * d['grad'] is the gradient at the minimum (should be 0 ish)
#         * d['funcalls'] is the number of function calls made.
#         * d['nit'] is the number of iterations.

##### Example to illustrate fmin_1_bfgs

In [45]:
def y(x):
    return x**2 - 2*x + 3

In [46]:
def y_prime(x):
    return 2*x -2 

In [47]:
fmin_l_bfgs_b(y, 0, fprime=y_prime)

(array([1.]),
 array([2.]),
 {'grad': array([0.]),
  'task': 'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL',
  'funcalls': 2,
  'nit': 1,
  'warnflag': 0})

##### Continue Our Example

In [26]:
for i in range(iterations):
    print('Start of iteration', i)
    start_time = time.time()
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, # the function to minimize
                                     x, # initial guess of input
                                     fprime=evaluator.grads, # function's grades
                                     maxfun=20)
    print('Current loss value:', min_val)
    img = x.copy().reshape((img_height, img_width, 3))
    img = deprocess_image(img)
    fname = result_prefix + '_at_iteration_%d.png' % i
    
    # Save image in each iteration, file name is "fname"
    save_img(fname, img)
    print('Image saved as', fname)
    end_time = time.time()
    print('Iteration %d completed in %ds' % (i, end_time - start_time))

Start of iteration 0
Current loss value: 5671027000.0
Image saved as my_result_at_iteration_0.png
Iteration 0 completed in 89s
Start of iteration 1
Current loss value: 1736448000.0
Image saved as my_result_at_iteration_1.png
Iteration 1 completed in 82s
Start of iteration 2
Current loss value: 979843400.0
Image saved as my_result_at_iteration_2.png
Iteration 2 completed in 78s
Start of iteration 3
Current loss value: 711555700.0
Image saved as my_result_at_iteration_3.png
Iteration 3 completed in 82s
Start of iteration 4
Current loss value: 562097800.0
Image saved as my_result_at_iteration_4.png
Iteration 4 completed in 81s
Start of iteration 5
Current loss value: 482284100.0
Image saved as my_result_at_iteration_5.png
Iteration 5 completed in 83s
Start of iteration 6
Current loss value: 423983520.0
Image saved as my_result_at_iteration_6.png
Iteration 6 completed in 83s
Start of iteration 7
Current loss value: 376935360.0
Image saved as my_result_at_iteration_7.png
Iteration 7 complet