# Computer Vision - Transfer learning

Image preprocessing

In [None]:
import numpy as np
from scipy.optimize import fmin_l_bfgs_b
import time

import tensorflow as tf

from keras import backend as K
from keras.preprocessing.image import load_img, save_img, img_to_array
from keras.applications.imagenet_utils import decode_predictions
from keras.applications import vgg16, vgg19, resnet50

import matplotlib.pyplot as plt
%matplotlib inline

from mymods.lauthom import *

### Load image(s) in PIL format

In [None]:
filename = '../data/ae_images/louvre_small.jpg'
image = load_img(filename, target_size=(224, 224))
image

print('PIL image size:', image.size)
_ = plt.imshow(image)
_ = plt.show()

### Convert PIL image to numpy array

 - PIL format: (width, height, channel)
 - numpy format: (height, width, channel)

In [None]:
np_img = img_to_array(image)
plt.imshow(np.uint8(np_img))
plt.show()
print('numpy array size', np_img.shape)

### Add batch dimension

`expand_dims()` will add an extra dimension to the data at a particular axis
We want the input matrix to the network to be of the form (batchsize, height, width, channels).  
Thus we add the extra dimension to the axis 0.

In [None]:
batch_img = np.expand_dims(np_img, axis=0)
print('image batch size', batch_img.shape)
_ = plt.imshow(np.uint8(batch_img[0]))

### Load pretrained model(s)

In [None]:
resnet = tf.contrib.keras.applications.ResNet50()

vgg = tf.keras.applications.VGG16(
    include_top=True,
    weights='imagenet',
    input_tensor=None,
    input_shape=None, #(224, 224, 3),
    pooling=None,
    classes=1000
)

### Predict/classify image(s)

ImageNet has one thousand probabilities/classes.   
To interpret the result, we can use another helpful function: `decode_predictions()`. By default only the top 5 class labels with their respective predicted probabilities are returned.

In [None]:
probabilities = resnet.predict(batch_img)
decode_predictions(probabilities)

In [None]:
probabilities = vgg.predict(batch_img)
decode_predictions(probabilities)

In [None]:
def predict_labels(model, image_batch):
    """Predict labels of images using pretrained model"""
    predictions = model.predict(image_batch.copy())
    return decode_predictions(predictions)

In [None]:
predict_labels(vgg, batch_img)

### Keras preprocessing

TODO find out the use of it.

In [None]:
processed_image = vgg16.preprocess_input(batch_img.copy()) # use copy to avoid cummulative mutations
print('image batch sizes:', batch_img.shape, processed_image.shape)
_ = plt.imshow(np.uint8(batch_img[0]))
_ = plt.show()

_ = plt.imshow(np.uint8(processed_image[0]))
_ = plt.show()

# NST with Keras

### 

In [None]:
result_prefix = 'result'
iterations = 10

In [None]:
base_image_path = '../data/ae_images/louvre_small.jpg'
style_reference_image_path = '../data/ae_images/monet.jpg'

In [None]:
# dimensions of the generated picture.
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

In [None]:
# these are the weights of the different loss components
total_variation_weight = .025
style_weight = 1.
content_weight = 1.

In [None]:
def preprocess_image(image_path):
    """util function to open, resize and format pictures into appropriate tensors"""
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return img

def deprocess_image(x):
    """util function to convert a tensor into a valid image"""
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_nrows, img_ncols))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [None]:
# get tensor representations of our images
base_image = K.variable(preprocess_image(base_image_path))
style_reference_image = K.variable(preprocess_image(style_reference_image_path))

# this will contain our generated image
if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))

# combine the 3 images into a single Keras tensor
input_tensor = K.concatenate([base_image,
                              style_reference_image,
                              combination_image], axis=0)

# build the VGG16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model = vgg19.VGG19(input_tensor=input_tensor,
                    weights='imagenet', include_top=False)
print('Model loaded.')

# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

In [None]:
outputs_dict

To compute the neural style loss, we first need to define 4 util functions:

- gram_matrix: feature-wise outer product
- style_loss: maintain the "style" of the reference image in the generated image
- content_loss: maintain the "content" of the base image in the generated image
- total_variation_loss: keep the generated image locally coherent

In [None]:
def gram_matrix(x):
    """the gram matrix of an image tensor == feature-wise outer product
    Captures the style from images."""
    assert K.ndim(x) == 3
    
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

the "style loss" is designed to maintain the style of the reference image in the generated image. It is based on the gram matrices (which capture style) of feature maps from the style reference image and from the generated image.

In [None]:
def style_loss(style, combination):
    """maintain the "style" of the reference image in the generated image"""
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

an auxiliary loss function designed to maintain the "content" of the base image in the generated image

In [None]:
def content_loss(base, combination):
    """maintain the "content" of the base image in the generated image"""
    return K.sum(K.square(combination - base))

Tthe total variation loss is designed to keep the generated image locally coherent

In [None]:
def total_variation_loss(x):
    """keep the generated image locally coherent"""
    assert K.ndim(x) == 4
    if K.image_data_format() == 'channels_first':
        a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
        b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
    else:
        a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
        b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
        
    return K.sum(K.pow(a + b, 1.25))

combine these loss functions into a single scalar

In [None]:
loss = K.variable(0.)
layer_features = outputs_dict['block5_conv2']
base_image_features = layer_features[0, ...]
combination_features = layer_features[2, ...]

# WARNING:tensorflow:Variable += will be deprecated => Use variable.assign_add()
loss += content_weight * content_loss(base_image_features,
                                      combination_features)

feature_layers = ['block1_conv1', 'block2_conv1','block3_conv1', 'block4_conv1',
                  'block5_conv1']

for layer_name in feature_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1, ...]
    combination_features = layer_features[2, ...]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(feature_layers)) * sl
    
loss += total_variation_weight * total_variation_loss(combination_image)

get the gradients of the generated image wrt the loss

In [None]:
grads = K.gradients(loss, combination_image)

outputs = [loss]
if isinstance(grads, (list, tuple)):
    outputs += grads
else:
    outputs.append(grads)

f_outputs = K.function([combination_image], outputs)

def eval_loss_and_grads(x):
    """Get the gradients of the generated image wrt the loss"""
    if K.image_data_format() == 'channels_first':
        x = x.reshape((1, 3, img_nrows, img_ncols))
    else:
        x = x.reshape((1, img_nrows, img_ncols, 3))
    outs = f_outputs([x])
    loss_value = outs[0]
    if len(outs[1:]) == 1:
        grad_values = outs[1].flatten().astype('float64')
    else:
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

Evaluator class

In [None]:
# this Evaluator class makes it possible
# to compute loss and gradients in one pass
# while retrieving them via two separate functions,
# "loss" and "grads". This is done because scipy.optimize
# requires separate functions for loss and gradients,
# but computing them separately would be inefficient.


class Evaluator(object):

    def __init__(self):
        self.loss_value = None
        self.grads_values = None

    def loss(self, x):
        assert self.loss_value is None
        loss_value, grad_values = eval_loss_and_grads(x)
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value

    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values

evaluator = Evaluator()

scipy-based optimization (L-BFGS)

In [None]:
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
x = preprocess_image(base_image_path)

for i in range(iterations):
    print('Start of iteration', i)
    start_time = time.time()
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
                                     fprime=evaluator.grads, maxfun=20)
    print('Current loss value:', min_val)
    
    # save current generated image
    img = deprocess_image(x.copy())
    fname = result_prefix + '_at_iteration_%d.png' % i
    save_img(fname, img)
    end_time = time.time()
    print('Image saved as', fname)
    print('Iteration %d completed in %ds' % (i, end_time - start_time))

In [None]:
import tensorflow as tf
zero_tsr = tf.Variable([0,0])
tf.assign(zero_tsr, [4,5])

In [None]:

zero_tsr = tf.placeholder(tf.float32, name='my_original_image')
tf.assign(zero_tsr, [4,5])

In [None]:
from IPython.display import display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [None]:
keras_way_graph = tf.Graph()
with keras_way_graph.as_default(), tf.Session().as_default():
    input_img = tf.placeholder(tf.float32, (1,224,224,3), name='input_image')
    vgg19 = tf.keras.applications.VGG19(weights='imagenet', include_top=False)
    output = vgg19(input_img)
    
right_way_graph = tf.Graph()
with right_way_graph.as_default(), tf.Session().as_default():
    input_img = tf.placeholder(tf.float32, (1,224,224,3), name='input_image')
    vgg19 = tf.keras.applications.VGG19(weights='imagenet', include_top=False, input_tensor=input_img)
    output = tf.identity(vgg19.layers[-1].output, name='output')


In [None]:
show_graph(keras_way_graph.as_graph_def())


In [None]:
show_graph(right_way_graph.as_graph_def())

In [None]:
def TB(cleanup=False):
    import webbrowser
    webbrowser.open('http://127.0.1.1:6006')

    !tensorboard --logdir="logs"

    if cleanup:
        !rm -R logs/

In [None]:
TB()