# Computer Vision - Transfer learning



In [None]:
import numpy as np
# import time

import tensorflow as tf

from keras import backend as K
from keras.preprocessing.image import load_img, save_img, img_to_array
from keras.applications.imagenet_utils import decode_predictions
from keras.applications import vgg16, vgg19, resnet50

import matplotlib.pyplot as plt
%matplotlib inline

from mymods.lauthom import *

### Image preprocessing

In [None]:
base_image_path = '../data/ae_images/louvre_small.jpg'
style_reference_image_path = '../data/ae_images/monet.jpg'

In [None]:
# dimensions of the generated picture.
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

#### Pre- and de- process images

In [None]:
def preprocess_image(image_path):
    """util function to open, resize and format pictures into appropriate tensors"""
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return img

def deprocess_image(x):
    """util function to convert a tensor into a valid image"""
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_nrows, img_ncols))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

#### Show images

In [None]:
_ = plt.imshow(plt.imread(base_image_path))
_ = plt.show()

_ = plt.imshow(plt.imread(style_reference_image_path))
_ = plt.show()

#### Content and style image tensors

In [None]:
base_image = K.variable(preprocess_image(base_image_path))
style_reference_image = K.variable(preprocess_image(style_reference_image_path))

#### Generated image placeholder

In [None]:
if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))

#### Combine the 3 images into a single Keras tensor

In [None]:
input_tensor = K.concatenate([base_image,
                              style_reference_image,
                              combination_image], axis=0)

## Build the VGG19 network with our 3 images as input

The model will be loaded with pre-trained ImageNet weights

In [None]:
model = vgg19.VGG19(input_tensor=input_tensor,
                    weights='imagenet', 
                    include_top=False)

print('Model loaded')

In [None]:
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

In [None]:
dictify(outputs_dict)

## Cost functions and weights

### Style weights

To compute the neural style loss, we first need to define 4 util functions:

- gram_matrix: feature-wise outer product
- style_loss: maintain the "style" of the reference image in the generated image
- content_loss: maintain the "content" of the base image in the generated image
- total_variation_loss: keep the generated image locally coherent

In [None]:
def gram_matrix(x):
    """Return gram matrix of an image tensor(feature-wise outer product).
    
    Captures the style from images."""
    # Single image
    assert K.ndim(x) == 3
    
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
        
    gram = K.dot(features, K.transpose(features))
    return gram

#### Style loss

Is designed to maintain the style of the reference image in the generated image. It is based on the gram matrices (which capture style) of feature maps from the style reference image and from the generated image.

In [None]:
def style_loss(style, combination):
    """maintain the "style" of the reference image in the generated image"""
    # check is single image (not batch)
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    
    GS = gram_matrix(style)
    GC = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return K.sum(K.square(GS - GC)) / (4. * (channels**2) * (size**2))

#### Content loss

In [None]:
def content_loss(base, combination):
    """maintain the "content" of the base image in the generated image"""
    return K.sum(K.square(base - combination))

#### Total variation loss 

In [None]:
def total_variation_loss(x, total_variation_weight=.025):
    """keep the generated image locally coherent"""
    # batch of images
    assert K.ndim(x) == 4
    
    r, c = img_nrows-1, img_ncols-1
    
    # Square difference in shifted(by 1) rows(a) and columns(b)
    if K.image_data_format() == 'channels_first':
        a = K.square(x[:, :, :r, :c] - x[:, :, 1:, :c])
        b = K.square(x[:, :, :r, :c] - x[:, :, :r, 1:])
    else:
        a = K.square(x[:, :r, :c, :] - x[:, 1:, :c, :])
        b = K.square(x[:, :r, :c, :] - x[:, :r, 1:, :])
        
    return K.sum(K.pow(a + b, 1.25)) * total_variation_weight

### Combine loss functions into a single loss scalar

In [None]:
# Weights of the different loss components
style_weight = 1.
content_weight = 1.

In [None]:
# Initialise loss variable
loss = K.variable(0.)

In [None]:
# Content loss
# block5_conv2 output on 2 images
layer_outputs = outputs_dict['block5_conv2']
base_image_output = layer_outputs[0, ...]
combination_output = layer_outputs[2, ...]

# WARNING:tensorflow:Variable += will be deprecated => Use variable.assign_add()
loss += (content_weight * content_loss(base_image_output, combination_output))

In [None]:
# Weighted style loss added to content loss
# Output layers
output_layers = ['block1_conv1', 'block2_conv1','block3_conv1', 'block4_conv1',
                  'block5_conv1']

# Can amend to different weights per output layer
for layer_name in output_layers:
    # Layerwise losses
    layer_outputs = outputs_dict[layer_name]
    style_output = layer_outputs[1, ...]
    combination_output = layer_outputs[2, ...]
    
    sl = style_loss(style_output, combination_output)
    loss += ((sl * style_weight/len(output_layers)))

In [None]:
# Weighted variation loss added to loss
loss += (total_variation_loss(combination_image))

In [None]:
# loss  # <tf.Tensor 'add_7:0' shape=() dtype=float32>

#### L-BFGS optimisation

L-BFGS optimisation is a Quasi-Newton method

Basically think of L-BFGS as a way of finding a (local) minimum of an objective function, making use of objective function values and the gradient of the objective function. That level of description covers many optimization methods in addition to L-BFGS though. 

You can read more about it in section 7.2 of Nocedal and Wright "Numerical Optimization, 2nd edition" http://www.springer.com/us/book/9780387303031 . A very cursory discussion of L-BFGS is provided at https://en.wikipedia.org/wiki/Limited-memory_BFGS .

First order method means gradients (first derivatives) (and maybe objective function values) are used, but not Hessian (second derivatives). Think of, for instance, gradient descent and steepest descent, among many others.

Second order method means gradients and Hessian are used (and maybe objective function values). Second order methods can be either based on

 - "Exact" Hessian matrix (or finite differences of gradients), in which case they are known as Newton methods 
 or
 - Quasi-Newton methods, which approximate the Hessian based on differences of gradients over several iterations, by imposing a "secant" (Quasi-Newton) condition. There are many different Quasi-Newton methods, which estimate the Hessian in different ways. One of the most popular is BFGS.
 
The BFGS Hessian approximation can either be based on the full history of gradients, in which case it is referred to as BFGS, or it can be based only on the most recent m gradients, in which case it is known as limited memory BFGS, abbreviated as L-BFGS. The advantage of L-BFGS is that it requires only retaining the most recent m gradients, where m is usually around 10 to 20, which is a much smaller storage requirement than n*(n+1)/2 elements required to store the full (triangle) of a Hessian estimate, as in required with BFGS, where n is the problem dimension. Unlike (full) BFGS, the estimate of the Hessian is never explicitly formed or stored in L-BFGS; rather, the calculations which would be required with the estimate of the Hessian are accomplished without explicitly forming it. L-BFGS is used instead of BFGS for very large problems (when n is very large), but might not perform as well as BFGS. Therefore, BFGS is preferred over L-BFGS when the memory requirements of BFGS can be met. On the other hand, L-BFGS may not be much worse in performance than BFGS.

#### Gradients of the generated image wrt the loss

In [None]:
# Build intermediate layer outputs: [loss, gradients]
# Loss is scalar
outputs = [loss]

# Get scalar, list, tuple of gradient(s) wrt loss
grads = K.gradients(loss, combination_image)

# Append gradients to loss
if isinstance(grads, (list, tuple)):
    outputs += grads
    n_grads = len(grads)
else:
    outputs.append(grads)
    n_grads = 1

# K.function(input, output) retrieves output from intermediate layer
# https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer
f_outputs = K.function([combination_image], outputs)


def reshape_img(img):
    """Reshape image based on Keras model format"""
    if K.image_data_format() == 'channels_first':
        return img.reshape((1, 3, img_nrows, img_ncols))
    return img.reshape((1, img_nrows, img_ncols, 3))


def eval_loss_and_grads(img):
    """Get the gradients of the generated image wrt the loss"""
    
    # Get intermediate layer outputs
    outs = f_outputs([reshape_img(img)])
    loss_value = outs[0]
    
    # Flatten gradient(s) vector - single or multiple gradients
    if len(outs[1:]) == 1: # n_grads == 1
        grad_values = outs[1].flatten().astype('float64')
    else:
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

In [None]:
class Evaluator(object):
    """Compute loss and gradients in one pass,
    while retrieving them via two separate functions.
    
    'scipy.optimize' requires separate functions for loss and gradients
    """
    def __init__(self):
        self.loss_value = None
        self.grads_values = None

    def loss(self, img):
        """Compute loss and gradients and return loss"""
        assert self.loss_value is None
        loss_value, grad_values = eval_loss_and_grads(img) # 
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value

    def grads(self, x):
        """Return loss and reset(zero) loss and gradients."""
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values

In [None]:
evaluator = Evaluator()

### Add TensorBoard callback to model

In [None]:
# Use Tensorboard
from keras.callbacks import TensorBoard

In [None]:
# https://keras.io/callbacks/
tb_callback = TensorBoard(
    log_dir='./logs',
    histogram_freq=0,
    write_graph=True,
    write_images=True
)
tb_callback.set_model(model)

### Run model

In [None]:
from scipy.optimize import fmin_l_bfgs_b

# Run L-BFGS optimization over the generated image to minimize the neural style loss
proc_img = preprocess_image(base_image_path)

# Effect = iterations x evaluations
iterations = 30  # frequency of intermediate results
evaluations = 5  # every 5 evaluations output result (5x5 = 9minutes, 1x30 = 9minutes)
stopwatch = Timer()

# Run optimizer, 
# proc_img loads recursively
for i in range(iterations):
    proc_img, min_val, info = fmin_l_bfgs_b(evaluator.loss,  # callable minimizing function
                                     proc_img.flatten(),     # initial guess
                                     fprime=evaluator.grads, # gradients of above minimizing function
                                     maxfun=evaluations)     # max. # evaluations
    
    print(f'Iteration: {i:2} Current loss value: {min_val:>12.0f} {stopwatch()}')
    
    # save current generated image
    img = deprocess_image(proc_img.copy())
    fname =  f'./output/image_at_iteration_{i}.png'
    save_img(fname, img)
    print(f'Image saved as {fname}')


## Show tensorboard graph _TODO

https://stackoverflow.com/questions/37128652/creating-log-directory-in-tensorboard


In [None]:
def TB(cleanup=False):
    # https://stackoverflow.com/questions/38189119/simple-way-to-visualize-a-tensorflow-graph-in-jupyter
    import webbrowser
    webbrowser.open('http://127.0.1.1:6006')

    !tensorboard --logdir=./logs

    if cleanup:
        !rm -R logs/

In [None]:
TB()