## Setup
### Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.

In [1]:
# boilerplate code
import os
from io import BytesIO

import numpy as np
import scipy.misc
import scipy.io
import math

import time
import PIL.Image
from IPython.display import clear_output, Image, display
from __future__ import print_function

import tensorflow as tf

try:
    reduce
except NameError:
    from functools import reduce

## VGG Net Import Functions

In [44]:
def run_net(data_path, input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )

    data = scipy.io.loadmat(data_path)
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = data['layers'][0]

    net = {}
    current = input_image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current

    assert len(net) == len(layers)
    return net, mean_pixel


def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)


def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')


def preprocess(image, mean_pixel):
    return image - mean_pixel


def unprocess(image, mean_pixel):
    return image + mean_pixel

## Stylize

In [55]:
CONTENT_LAYER = 'conv4_2'
STYLE_LAYERS = ('conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1')

def stylize(network, initial, content, styles, iterations,
        content_weight, style_weight, style_blend_weights, tv_weight,
        learning_rate, print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    # compute content features in feedforward mode
    print('Computing forward pass for content features...', end='')
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net, mean_pixel = run_net(network, image)
        content_pre = np.array([preprocess(content, mean_pixel)])
        content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
                feed_dict={image: content_pre})
    print('Done!')

    # compute style features in feedforward mode
    print('Computing forward pass for style features...', end='')
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net, _ = run_net(network, image)
            style_pre = np.array([preprocess(styles[i], mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram
    print('Done!')

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([preprocess(initial, mean_pixel)])
            initial = initial.astype('float32')
        image = tf.Variable(initial)
        net, _ = run_net(network, image)

        # content loss
        content_loss = content_weight * (2 * tf.nn.l2_loss(
                net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
                content_features[CONTENT_LAYER].size)
        
        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
            
        # total variation denoising
        if tv_weight > 0:
            tv_y_size = _tensor_size(image[:,1:,:,:])
            tv_x_size = _tensor_size(image[:,:,1:,:])
            tv_loss = tv_weight * 2 * (
                    (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                        tv_y_size) +
                    (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                        tv_x_size))
            # overall loss
            loss = content_loss + style_loss + tv_loss
        else:
            # overall loss
            loss = content_loss + style_loss
            tv_loss = 0

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        def print_progress(i, last=False):
            print('\rIteration %d/%d' % (i + 1, iterations))
            if print_iterations and i % print_iterations == 0:
                print('  content loss: %g\n' % content_loss.eval())
                print('    style loss: %g\n' % style_loss.eval())
                print('       tv loss: %g\n' % tv_loss.eval())
                print('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for i in range(iterations):
                last_step = (i == iterations - 1)
                print_progress(i, last=last_step)
                train_step.run()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()
                    yield (
                        (None if last_step else i),
                        unprocess(best.reshape(shape[1:]), mean_pixel)
                    )

def _tensor_size(tensor):
    from operator import mul
    return reduce(mul, (d.value for d in tensor.get_shape()), 1)

## Helper Functions

In [40]:
def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 1)*255)
    # create an empty (?) file object
    f = BytesIO()
    # save array to file object
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

# Helper functions that use TF to resize an image
def tffunc(*argtypes):
    '''Helper that transforms TF-graph generating function into a regular one.
    See "resize" function below.
    '''
    placeholders = list(map(tf.placeholder, argtypes))
    def wrap(f):
        out = f(*placeholders)
        def wrapper(*args, **kw):
            return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
        return wrapper
    return wrap

def resize(img, size):
    img = tf.expand_dims(img, 0)
    return tf.image.resize_bilinear(img, size)[0,:,:,:]
resize = tffunc(np.float32, np.int32)(resize)

def stdize(a, s=0.1):
    '''Normalize the image range for visualization'''
    return (a-a.mean())/max(a.std(), 1e-4)*s + 0.5

## Single Example

In [76]:
# default arguments
CONTENT_WEIGHT = 5e0
STYLE_WEIGHT = 1e0 #1e2
TV_WEIGHT = 1e2
LEARNING_RATE = 1e1
STYLE_SCALE = 1.0
ITERATIONS = 1000

saving = 1
NETWORK_LOC = '/media/data/Dropbox/Git/tf-deepdream/imagenet-vgg-verydeep-19.mat'
# CONTENT_LOC = '/media/data/Dropbox/image-play/source/snow/sierra_in_snow.jpg'
CONTENT_LOC = '/media/data/Dropbox/image-play/source/buildings/hong_kong_0.jpg'
STYLE_LOC = '/media/data/Dropbox/image-play/source/art/starry_night.jpg'
OUTPUT_LOC = '/media/data/Dropbox/image-play/ns-single_image/hong_kong_skyline'

PRINT_ITERATIONS = None
CHECKPOINT_ITERATIONS = None

# import images
content_image = np.float32(PIL.Image.open(CONTENT_LOC))
style_image = np.float32(PIL.Image.open(STYLE_LOC))
target_shape = content_image.shape
img0 = np.random.randn(*target_shape)

start = time.time()
for iteration, image in stylize(
    network=NETWORK_LOC,
    initial=img0,
    content=content_image,
    styles=[style_image],
    iterations=ITERATIONS,
    content_weight=CONTENT_WEIGHT,
    style_weight=STYLE_WEIGHT,
    style_blend_weights=[1],
    tv_weight=TV_WEIGHT,
    learning_rate=LEARNING_RATE,
    print_iterations=PRINT_ITERATIONS,
    checkpoint_iterations=CHECKPOINT_ITERATIONS
):
    if (iteration is None) and saving:
        file_name = 'hong_kong_1000_s1_c5_starrynight.jpg'
        file_path = '/'.join([OUTPUT_LOC, file_name])
        image = np.uint8(np.clip(image, 0, 255))
        PIL.Image.fromarray(image).save(file_path, 'jpeg')
end = time.time()

print('Total time elapsed: %d seconds' % (end-start))

Computing forward pass for content features...Done!
Computing forward pass for style features...Done!


Iteration 1000/1000

Total time elapsed: 1148 seconds
