# การสร้างงานศิลปะด้วย Neural Style Transfer

Neural Style Transfer (NST) คือการสร้างรูปภาพที่มีข้อมูลเหมือนกันกับ รูปภาพตั้งต้น (content image) แต่มีสไตล์ของรูปเหมือนกันกับสไตล์ของรูปอีกรูปหนึ่ง (style image)  เช่น

![images/nst.jpeg](images/nst.jpeg)
<center>**รูปที่ 1 ตัวอย่างของ Neural Style Transfer**<br /> (source: http://www.subsubroutine.com/sub-subroutine/2016/11/12/painting-like-van-gogh-with-convolutional-neural-networks)</center>

เทคนิค NST ถูกคิดค้นขึ้นโดย Gatys et al. (2015) https://arxiv.org/abs/1508.06576 

แนวคิดหลักคือ การใช้ loss function ซึ่งประกอบด้วย 3 องค์ประกอบ ได้แก่ "style loss", "content loss", และ "total variation loss":

* total variation loss เป็นฟังก์ชัน ที่นำมาใช้ช่วยสร้างความกลมกลืน (visual coherence) ให้กับรุปภาพที่สร้างขึ้น
* style loss ใช้สำหรับดึง สไตล์จาก style image ออกมาผสมกับ content image ส่วนนี้เป็นส่วนที่นำ convolutional neural network มาใช้ 
* content loss เป็นฟังก์ชัน ที่นำมาใช้สำหรับสกัดเนื้อข้อมูลจาก content image ออกมาผสมกับรูปภาพที่จะสร้างขึ้น

นอกจาก loss funcitons ทั้งสามส่วน ดังกล่าวข้างต้น เทคนิคอีกอันหนึ่งที่ถูกนำมาประยุกต์ใช้สร้าง NST ก็คือ transfer learning โดยใช้ pretrained convnet ในตัวอย่างที่จะอธิบายต่อไปนี้ จะเป็นการใช้ pretrained model **VGG19** ของ Keras มาใช้เป็นฐานในการสร้าง deep learning model สำหรับ neural style transer

### Neural Style Transfer ด้วย Keras

#### Import modules

In [1]:
from __future__ import print_function
from keras.preprocessing.image import load_img, img_to_array
from scipy.misc import imsave
import numpy as np
from scipy.optimize import fmin_l_bfgs_b
import time
import argparse

from keras.applications import vgg19
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  (fname, cnt))
  (fname, cnt))


#### Set file paths and other model parameters

In [2]:
# set image paths
base_image_path  = 'images/kinkakuji.jpg'
style_image_path = 'images/starry.jpg'
result_prefix    = 'results/kinkakuji_starry' 
iterations = 10

# weights of different loss functions
total_variation_weight = 1.0
style_weight = 1.0
content_weight = 0.025

# dimensions of the generated picture.
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

#### Load Images and Setup VGG19

In [3]:
def preprocess_image(image_path):
    # load image from file path
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    # convert the image into an array
    img = img_to_array(img)
    # expand the dimension of the image
    img = np.expand_dims(img, axis=0) 
    # format the image dimenstions for inputing to VGG19
    img = vgg19.preprocess_input(img) 
    return img


def deprocess_image(x):
    """Convert an image tensor back to a valid image"""
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_nrows, img_ncols))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR' -> 'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x


In [4]:
# construct base_image variable
base_image = K.variable(preprocess_image(base_image_path))

# construct style_image variable
style_image = K.variable(preprocess_image(style_image_path))

# construct generated_image variable
if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))
    
# combine the 3 images into a single tensor
input_tensor = K.concatenate([base_image, 
                              style_image, 
                              combination_image],
                              axis=0)

# build the pretrained VGG19 model with the 3 images as input
model = vgg19.VGG19(input_tensor=input_tensor, 
                    weights='imagenet', 
                    include_top=False)

print('VGG19 model loaded.')

# construct a dictionary mapping from layer name => layer output
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

VGG19 model loaded.


#### Define Loss Functions

In [5]:
def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    gram = K.dot(features, K.transpose(features))
    return gram

def style_loss(style, combination):
    """The style loss designed to maintain the style of the 
    reference image in the generated image.
    It is based on the gram matrices of feature maps from the 
    style image and from the generated image."""
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

def content_loss(base, combination):
    """The content loss designed to maintain the 'content' 
    of the base image in the generated image."""
    
    return K.sum(K.square(combination - base))

def total_variation_loss(x):
    assert K.ndim(x) == 4
    if K.image_data_format() == 'channels_first':
        a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
        b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
    else:
        a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
        b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
    return K.sum(K.pow(a + b, 1.25))

# combine loss functions into a single scalar
loss = K.variable(0.)
layer_features = outputs_dict['block5_conv2']
base_image_features = layer_features[0, :, :, :]
combination_features = layer_features[2, :, :, :]
loss += content_weight * content_loss(base_image_features, combination_features)

feature_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 
                  'block4_conv1', 'block5_conv1']
for layer_name in feature_layers:
    layer_features = outputs_dict[layer_name]
    style_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    s_loss = style_loss(style_features, combination_features)
    loss += (style_weight / len(feature_layers)) * s_loss

loss += total_variation_weight * total_variation_loss(combination_image)

grads = K.gradients(loss, combination_image)

outputs = [loss]
if isinstance(grads, (list,tuple)):
    outputs += grads
else:
    outputs.append(grads)
    
f_outputs = K.function([combination_image], outputs)

#### Define an the Evaluator Class 

In [6]:
def eval_loss_and_grads(x):
    if K.image_data_format() == 'channels_first':
        x = x.reshape((1, 3, img_nrows, img_ncols))
    else:
        x = x.reshape((1, img_nrows, img_ncols, 3))
    outs = f_outputs([x])
    loss_value = outs[0]
    if len(outs[1:]) == 1:
        grad_values = outs[1].flatten().astype('float64')
    else:
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

# the Evaluator class is used for computing loss and gradient during the optimization 
class Evaluator(object):
    
    def __init__(self):
        self.loss_value = None
        self.grads_values = None
    
    def loss(self, x):
        assert self.loss_value is None
        loss_value, grad_values = eval_loss_and_grads(x)
        self.loss_value  = loss_value
        self.grad_values = grad_values
        return self.loss_value
    
    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values
    
evaluator = Evaluator()

#### Run the optimization over the pixels of the generated image 

In [7]:
x = preprocess_image(base_image_path)

for i in range(iterations):
    print('Start of iteration', i)
    start_time = time.time()
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
                                     fprime=evaluator.grads, maxfun=20)
    print('Current loss value:', min_val)
    img = deprocess_image(x.copy())
    fname = result_prefix + '_at_iteration_%d.png' % i
    imsave(fname, img)
    end_time = time.time()
    print('Image saved as', fname)
    print('Iteration %d completed in %ds' % (i, end_time - start_time))

Start of iteration 0
Current loss value: 6074272000.0
Image saved as results/kinkakuji_starry_at_iteration_0.png
Iteration 0 completed in 20s
Start of iteration 1


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Current loss value: 3910710300.0
Image saved as results/kinkakuji_starry_at_iteration_1.png
Iteration 1 completed in 17s
Start of iteration 2
Current loss value: 3230177000.0
Image saved as results/kinkakuji_starry_at_iteration_2.png
Iteration 2 completed in 25s
Start of iteration 3
Current loss value: 2936898800.0
Image saved as results/kinkakuji_starry_at_iteration_3.png
Iteration 3 completed in 22s
Start of iteration 4
Current loss value: 2755356200.0
Image saved as results/kinkakuji_starry_at_iteration_4.png
Iteration 4 completed in 23s
Start of iteration 5
Current loss value: 2641808100.0
Image saved as results/kinkakuji_starry_at_iteration_5.png
Iteration 5 completed in 29s
Start of iteration 6
Current loss value: 2525306400.0
Image saved as results/kinkakuji_starry_at_iteration_6.png
Iteration 6 completed in 29s
Start of iteration 7
Current loss value: 2459997200.0
Image saved as results/kinkakuji_starry_at_iteration_7.png
Iteration 7 completed in 29s
Start of iteration 8
Curren