# Main Debug Notebook

#### download weight and libraries


In [1]:
import os

weight_folder = './data/'
lib_folder = './src/'
img_folder = './img/'
if not os.path.exists(weight_folder): 
  os.mkdir(weight_folder)
if not os.path.exists(img_folder): 
  os.mkdir(img_folder)
if not os.path.exists(lib_folder): 
  os.mkdir(lib_folder)

# Download weight
weight_url = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'
weight_path = weight_folder + 'imagenet-vgg-verydeep-19.mat'
if not os.path.exists(weight_path):
  !curl -o $weight_path $weight_url

# Download libraries
libraries = ['vgg.py', 'constants.py', 'neural_network.py', 'utils.py']
library_url = 'https://raw.githubusercontent.com/abx67/AdvML-style-transfer/master/src/'

for lib in libraries:
  lib_path = lib_folder + lib
  lib_url = library_url + lib
  if not os.path.exists(lib_path):
    !curl -o $lib_path $lib_url
    
# Download images
style_img_name = 'van_gogh.jpg'
content_img_name = 'new_york.jpg'
img_url = 'https://raw.githubusercontent.com/abx67/AdvML-style-transfer/master/img/'

style_path = img_folder + style_img_name
style_url = img_url + style_img_name
content_path = img_folder + content_img_name
content_url = img_url + content_img_name

if not os.path.exists(style_path):
  !curl -o $style_path $style_url
if not os.path.exists(content_path):
  !curl -o $content_path $content_url

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  510M  100  510M    0     0  20.6M      0  0:00:24  0:00:24 --:--:-- 21.9M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2470  100  2470    0     0   4891      0 --:--:-- --:--:-- --:--:--  4891
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   779  100   779    0     0   1719      0 --:--:-- --:--:-- --:--:--  1715
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  7101  100  7101    0     0  39232      0 --:--:-- --:--:-- --:--:-- 39232
  % Total    % Received % Xferd  Average Speed   Tim

#### import modules

In [0]:
import time
import numpy as np
import PIL.Image
import sys
sys.path.append(lib_folder)
# from neural_network import NeuralNetwork
from vgg import VGG

#### contents definition

In [0]:
VGG_MAT_PATH = weight_path
CONTENT_IMAGE_PATH = content_path
STYLE_IMAGE_PATH = style_path
MIXED_IMAGE_PATH = './output/'
if not os.path.exists(MIXED_IMAGE_PATH): 
  os.mkdir(MIXED_IMAGE_PATH)

CONTENT_WEIGHT = 5
STYLE_WEIGHT = 50000
VARIATION_WEIGHT = 10000
LEARNING_RATE = 10
BETA1 = 0.9
BETA2 = 0.999
EPSILON = 1e-08
MAX_ITERATION = 1000
POOLING = 'avg'
CHECK_POINT = 50

#### function definition

In [0]:
def load_image(file_path, max_size=None, shape=None):
    # load image and define the factor used to tranfer the image size
    image = PIL.Image.open(file_path)
    # resize by max_size
    if max_size is not None:
        factor = float(max_size) / np.max(image.size)  # image.size = [height, width, 3]
        size = np.array(image.size) * factor
        size = size.astype(int)
        image = image.resize(size, PIL.Image.LANCZOS)  # image resize with filter LANCZOS
    # resize with shape
    if shape is not None:
        image = image.resize(shape, PIL.Image.LANCZOS)
    # return image values with float data type
    return np.float32(image)


def save_image(file_path, image):
    # ensure the pixel value is int between 0 and 255
    image = np.clip(image, 0.0, 255.0).astype(np.uint8)
    # write to file
    PIL.Image.fromarray(image).save(file_path)
    return


def style_transfer(content_image_path, style_image_path, mixed_image_path,
                   content_weight, style_weight, variation_weight,
                   pooling, learning_rate, beta1, beta2, epsilon, max_iteration, check_point):
    # set the time point
    time_start = time.time()

    # load image
    content_image = load_image(content_image_path)
    style_image = load_image(style_image_path, shape=content_image.shape[:2])

    # initialize object
    vgg = VGG(VGG_MAT_PATH, pooling)
    nn = NeuralNetwork(content_image, style_image, vgg, content_weight, style_weight, variation_weight)

    # train the model
    for i, mixed_image in nn.train_model(learning_rate, beta1, beta2, epsilon, max_iteration, check_point):
        save_image(mixed_image_path + 'v1_{}.jpeg'.format(i + 1), mixed_image)

    # print time
    time_end = time.time()
    print('Time elapsed: {} seconds'.format(round(time_end - time_start)))

    return

In [0]:
import numpy as np
import tensorflow as tf
import os
from functools import reduce

PATH = '../'

CONTENT_LAYER_WEIGHTS = {
    'relu4_2': 1.0,
}

STYLE_LAYER_WEIGHTS = {
    'relu1_1': 0.2,
    'relu2_1': 0.2,
    'relu3_1': 0.2,
    'relu4_1': 0.2,
    'relu5_1': 0.2,
}


class NeuralNetwork(object):
    # neural network used for style transfer, which includes the definition of loss function, optimization function, etc.
    def __init__(self, content, style, vgg, content_weight, style_weight, variation_weight):
        # content - image, shape = (height, width, 3)
        # style - image, shape = (height, width, 3)
        # vgg - vgg object, definition see vgg.py
        # content_weight - scalar, weight for the loss of the content image
        # style_weight - scalar, weight for the loss of the style image
        # variation_weight - scalar, weight for the loss of variation of the mixed image

        self.content = content
        self.style = style
        self.vgg = vgg

        self.content_weight = content_weight
        self.style_weight = style_weight
        self.variation_weight = variation_weight

        self.content_shape = (1,) + self.content.shape
        self.style_shape = (1,) + self.style.shape

        self.content_layer_weights = CONTENT_LAYER_WEIGHTS
        self.style_layer_weights = STYLE_LAYER_WEIGHTS

        self.content_features = self.get_content_features()
        self.style_features = self.get_style_features()

        return

    def get_content_features(self):
        content_features = {}
        graph = tf.Graph()
        with graph.as_default(), graph.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=self.content_shape)
            net = self.vgg.load_net(image)
            content = np.array(self.content - self.vgg.mean_pix)  # de-mean
            content = np.reshape(content, (1,) + content.shape)
            for layer_name in self.content_layer_weights:
                content_features[layer_name] = net[layer_name].eval(feed_dict={image: content})
        print(content_features['relu4_2'].shape)
        return content_features

    def get_style_features(self):
        style_features = {}
        graph = tf.Graph()
        with graph.as_default(), graph.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=self.style_shape)
            net = self.vgg.load_net(image)
            style = np.array(self.style - self.vgg.mean_pix)  # de-mean
            style = np.reshape(style, (1,) + style.shape)
            for layer_name in self.style_layer_weights:
                features = net[layer_name].eval(feed_dict={image: style})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = features.T.dot(features) / features.size  # TODO: find out why divide by the size
                style_features[layer_name] = gram
        return style_features

    def train_model(self, learning_rate, beta1, beta2, epsilon, max_iteration, check_point, init_image='content'):
        with tf.Graph().as_default():
            # initial image with random guess
            noise = np.random.normal(size=self.content_shape, scale=np.std(self.content) * 0.1)  # useless
            if init_image == 'random':
                init_image = tf.random_normal(self.content_shape)
            elif init_image == 'content':
                init_image = np.reshape(np.array(self.content - self.vgg.mean_pix), self.content_shape)
            mixed_image = tf.Variable(init_image, dtype=tf.float32)
            mixed_net = self.vgg.load_net(mixed_image)

            # calculate loss
            loss_content = self.calculate_loss_content(mixed_net)
            loss_style = self.calculate_loss_style(mixed_net)
            loss_variation = self.calculate_loss_variation(mixed_image)
            loss_total = loss_content + loss_style + loss_variation

            # summary statistics
            tf.summary.scalar('loss_content', loss_content)
            tf.summary.scalar('loss_style', loss_style)
            tf.summary.scalar('loss_variation', loss_variation)
            tf.summary.scalar('loss_total', loss_total)
            summary_loss = tf.summary.merge_all()

            # initialize optimization
            train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss_total)

            with tf.Session() as sess:
                summary_writer = tf.summary.FileWriter(PATH + 'logs', sess.graph)
                sess.run(tf.global_variables_initializer())

                for i in range(max_iteration):
                    train_step.run()
                    summary = sess.run(summary_loss)
                    summary_writer.add_summary(summary, i)
                    # save image
                    if (check_point and ((i + 1) % check_point) == 0) or i == max_iteration - 1:
                        image_out = mixed_image.eval()
                        image_out = image_out.reshape(self.content_shape[1:]) + self.vgg.mean_pix
                        print('iter: {}, loss total: {}, loss content: {}, loss style: {}, loss variation: {}'.format(
                            i + 1, loss_total.eval(), loss_content.eval(), loss_style.eval(), loss_variation.eval()
                        ))
                        yield i, image_out
            return mixed_image

    def calculate_loss_content(self, mixed_net):
        losses = []
        for layer_name in self.content_layer_weights:
            losses += [self.content_layer_weights[layer_name] * 2 * tf.nn.l2_loss(
                mixed_net[layer_name] - self.content_features[layer_name]) / self.content_features[
                           layer_name].size]  # TODO: find out why divide by the size
        return self.content_weight * reduce(tf.add, losses)

    def calculate_loss_style(self, mixed_net):
        losses = []
        for layer_name in self.style_layer_weights:
            _, height, width, channel = mixed_net[layer_name].get_shape()
            size = height.value * width.value * channel.value
            mixed_features = tf.reshape(mixed_net[layer_name], (-1, channel.value))
            mixed_gram = tf.matmul(tf.transpose(mixed_features),
                                   mixed_features) / size  # TODO: find out why divide by the size
            losses += [self.style_layer_weights[layer_name] * 2 * tf.nn.l2_loss(
                mixed_gram - self.style_features[
                    layer_name]) / self.style_features[layer_name].size]  # TODO: find out why divide by the size
        return self.style_weight * reduce(tf.add, losses)

    def calculate_loss_variation(self, mixed_image):
        height_size = np.prod([dim.value for dim in mixed_image[:, 1:, :, :].get_shape()])
        width_size = np.prod([dim.value for dim in mixed_image[:, :, 1:, :].get_shape()])
        
        loss = 2 * (tf.nn.l2_loss(
            mixed_image[:, 1:, :, :] - mixed_image[:, :mixed_image.shape[1] - 1, :, :]) / height_size + tf.nn.l2_loss(
            mixed_image[:, :, 1:, :] - mixed_image[:, :, :mixed_image.shape[2] - 1, :]) / width_size)
        #loss_poisson = tf.nn.log_poisson_loss(
        height_diff = mixed_image[:,1:, :, :]-mixed_image[:,:mixed_image.shape[1]-1, :, :]
        weight_diff = mixed_image[:, :, 1:, :]-mixed_image[:, :, :mixed_image.shape[2]-1, :]
        ## Threshold for variation loss detection
        threshold = 100.0
        height_diff_k = tf.less(tf.math.abs(height_diff), tf.constant(threshold))
        height_diff_k = tf.boolean_mask(height_diff, height_diff_k)
        weight_diff_k = tf.less(tf.math.abs(weight_diff), tf.constant(threshold))
        weight_diff_k = tf.boolean_mask(weight_diff, weight_diff_k)

        #loss = 2 * (tf.nn.l2_loss(height_diff_k) / height_size + tf.nn.l2_loss(weight_diff_k) / width_size)
        
        
        h = 0.05
        height_kernel = tf.math.exp(-1*tf.math.abs(height_diff))/h
        weight_kernel = tf.math.exp(-1*tf.math.abs(weight_diff))/h
        #loss = 2 * (tf.nn.l2_loss(height_kernel) / height_size + tf.nn.l2_loss(weight_kernel) / width_size)
        return self.variation_weight * loss
      
    #def kernel_loss(self, mixed_image， threshold):
     #   diff =   (mixed_image[:, 1:, :, :] - mixed_image[:, :mixed_image.shape[1] - 1, :, :]) 


In [6]:
#TEST CODE for Variation LOSS
content_image = load_image(CONTENT_IMAGE_PATH)
style_image = load_image(STYLE_IMAGE_PATH, shape=content_image.shape[:2])
vgg = VGG(VGG_MAT_PATH, pooling = "avg")
nn1 = NeuralNetwork(content_image, style_image, vgg, 5, 5000, 1000)
#mixed_image = tf.Variable(init_image, dtype=tf.float32)

(1, 95, 71, 512)


In [0]:

init_image = np.reshape(np.array(content_image - vgg.mean_pix), content_image.shape)
mixed_image = tf.Variable(init_image, dtype=tf.float32)
init = tf.global_variables_initializer()


height_diff = mixed_image[1:, :, :]-mixed_image[:mixed_image.shape[0]-1, :, :]
weight_diff = mixed_image[:, 1:, :]-mixed_image[:, :mixed_image.shape[1]-1, :]
## Threshold for variation loss detection
threshold = 100.0
height_diff_k = tf.less(tf.math.abs(height_diff), tf.constant(threshold))
height_diff_k = tf.boolean_mask(height_diff, height_diff_k)
weight_diff_k = tf.less(tf.math.abs(weight_diff), tf.constant(threshold))
weight_diff_k = tf.boolean_mask(weight_diff, weight_diff_k)

with tf.Session() as sess:
  sess.run(init)
  print(sess.run(height_diff))
  print(sess.run(height_diff_k))
  print(sess.run(tf.shape(mixed_image)))

In [11]:
# h is the parameter 
h = 0.05
height_kernel = tf.math.exp(-1*tf.math.abs(height_diff))/h

with tf.Session() as sess:
  sess.run(init)
  print(sess.run(height_kernel))

[[[8.2798749e-07 8.2798749e-07 8.2798749e-07]
  [3.0459958e-07 3.0459958e-07 3.0459958e-07]
  [3.0459958e-07 3.0459958e-07 3.0459958e-07]
  ...
  [3.0459958e-07 3.0459958e-07 3.0459958e-07]
  [1.1205592e-07 1.1205592e-07 1.1205592e-07]
  [1.5165121e-08 1.1205592e-07 1.1205635e-07]]

 [[4.9575042e-02 6.7092525e-03 1.3475893e-01]
  [4.9575042e-02 6.7092525e-03 1.8237639e-02]
  [1.8237639e-02 1.8237639e-02 1.8237639e-02]
  ...
  [1.8237639e-02 2.4681960e-03 6.7092525e-03]
  [1.8237639e-02 2.4681960e-03 6.7092525e-03]
  [1.8237639e-02 2.4681960e-03 6.7092525e-03]]

 [[2.7067056e+00 2.7067056e+00 2.7067056e+00]
  [2.7067056e+00 2.7067056e+00 3.6631280e-01]
  [2.7067056e+00 2.7067056e+00 3.6631280e-01]
  ...
  [2.7067056e+00 7.3575888e+00 2.0000000e+01]
  [2.0000000e+01 2.0000000e+01 2.0000000e+01]
  [2.0000000e+01 2.0000000e+01 2.0000000e+01]]

 ...

 [[1.8237639e-02 1.8237639e-02 1.8237639e-02]
  [1.3475893e-01 1.3475893e-01 1.3475893e-01]
  [9.9574137e-01 9.9574137e-01 9.9574137e-01]
  ..

#### main function

In [18]:
if __name__ == '__main__':
    style_transfer(content_image_path=CONTENT_IMAGE_PATH,
                   style_image_path=STYLE_IMAGE_PATH,
                   mixed_image_path=MIXED_IMAGE_PATH,
                   content_weight=CONTENT_WEIGHT,
                   style_weight=STYLE_WEIGHT,
                   variation_weight=VARIATION_WEIGHT,
                   pooling=POOLING,
                   learning_rate=LEARNING_RATE,
                   beta1=BETA1,
                   beta2=BETA2,
                   epsilon=EPSILON,
                   max_iteration=MAX_ITERATION,
                   check_point=CHECK_POINT)

(1, 95, 71, 512)
iter: 50, loss total: 12251283.0, loss content: 415448.46875, loss style: 2207239.25, loss variation: 9628595.0
iter: 100, loss total: 10431272.0, loss content: 410810.0625, loss style: 1582027.375, loss variation: 8438435.0
iter: 150, loss total: 9972294.0, loss content: 406106.875, loss style: 1393345.5, loss variation: 8172842.0
iter: 200, loss total: 9734897.0, loss content: 403270.28125, loss style: 1308593.375, loss variation: 8023033.0
iter: 250, loss total: 9590807.0, loss content: 400963.6875, loss style: 1267129.625, loss variation: 7922713.5
iter: 300, loss total: 9489703.0, loss content: 400169.78125, loss style: 1246756.875, loss variation: 7842776.0
iter: 350, loss total: 9410239.0, loss content: 399445.53125, loss style: 1235977.75, loss variation: 7774815.5
iter: 400, loss total: 9378101.0, loss content: 397331.0, loss style: 1252562.875, loss variation: 7728207.0
iter: 450, loss total: 9391082.0, loss content: 398946.5625, loss style: 1255935.0, loss v