In [1]:
from __future__ import print_function

import numpy as np
import scipy.misc
import scipy.io
import tensorflow as tf

from PIL import Image

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline

Using TensorFlow backend.


In [2]:
STYLE_LAYERS = [
    ('conv1_1', .5),
#     ('conv1_2', .2),
    ('conv2_1', 1.),
#     ('conv2_2', .2),
    ('conv3_1', 1.5),
#     ('conv3_2', 0.2),
#     ('conv3_3', .2),
#     ('conv3_4', 0.2),
    ('conv4_1', 3.),
#     ('conv4_2', 0.2),
#     ('conv4_3', 0.2),
#     ('conv4_4', .2),
    ('conv5_1', 4.),
#     ('conv5_2', 0.2),
#     ('conv5_3', 0.2),
#     ('conv5_4', .2)
]

In [3]:
class CONFIG:
    IMAGE_WIDTH = 256
    IMAGE_HEIGHT = 256
    COLOR_CHANNELS = 3
    CONTENT_WEIGHT = 5
    STYLE_WEIGHT = 100
    TOTAL_VARIATION_WEIGHT = 1.
    NOISE_RATIO = .6
    MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))

In [4]:
def reshape_and_normalize_image(image_path):
    """
    Reshape and normalize the input image (content or style)
    """
    
    image = Image.open(image_path)
    image = image.resize((CONFIG.IMAGE_WIDTH, CONFIG.IMAGE_HEIGHT))
    
    image_array = np.asarray(image, dtype='float32')
    image = np.expand_dims(image_array, axis=0)
    
    # Substract the mean to match the expected input of VGG16
    image = image - CONFIG.MEANS
    
    return image

In [5]:
def generate_noise_image(content_image, noise_ratio = CONFIG.NOISE_RATIO):
    """
    Generates a noisy image by adding random noise to the content_image
    """
    
    # Generate a random noise_image
    noise_image = np.random.uniform(-20, 20, (1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH,
                                              CONFIG.COLOR_CHANNELS)).astype('float32')
    
    # Set the input_image to be a weighted average of the content_image and a noise_image
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    
    return input_image

In [6]:
def compute_content_loss(content, generated):
    m, n_H, n_W, n_C = generated.get_shape().as_list()

    content = tf.reshape(content, [-1])
    generated = tf.reshape(generated, [-1])

    return tf.divide(tf.reduce_sum(tf.square( \
                    tf.subtract(content, generated))), (4*n_H*n_W*n_C))

In [7]:
def gram_matrix(x):
    gram = tf.matmul(x, x, transpose_a=True)
    return gram

def style_layer_loss(style, generated):
    m, n_H, n_W, n_C = generated.get_shape().as_list()
    
    style = tf.reshape(style, (n_H*n_W, n_C))
    generated = tf.reshape(generated, (n_H*n_W, n_C))
    
    S = gram_matrix(style)
    G = gram_matrix(generated)

    channels = n_C
    size = n_H*n_W

    return tf.divide(tf.reduce_sum(tf.square(tf.subtract(S, G))),
                     (4. * (channels ** 2) * (size ** 2)))

In [8]:
# Get content_image from file
content_image_path = "./images/louvre_small.jpg"
content_image = reshape_and_normalize_image(content_image_path)

image_to_show = plt.imread(content_image_path)
# plt.imshow(image_to_show)

In [9]:
style_image_path = "./images/monet.jpg"
style_image = reshape_and_normalize_image(style_image_path)

image_to_show = plt.imread(content_image_path)
# plt.imshow(image_to_show)

In [10]:
# Generate a random noise_image
generated_image = generate_noise_image(content_image)

In [11]:
# Reset the graph
tf.reset_default_graph()

# Start interactive session
sess = tf.InteractiveSession()
# K.set_session(sess)

In [12]:
# Combined input for model
# input_tensor = K.concatenate([content_image,
#                           style_image,
#                           generated_image], axis=0)

# input_tensor = tf.Variable(np.zeros((1,
#                                      CONFIG.IMAGE_HEIGHT,
#                                      CONFIG.IMAGE_WIDTH,
#                                      CONFIG.COLOR_CHANNELS)),
#                            dtype='float32', name="input_tensor")

# input_tensor = Input(tensor=input_tensor)
# model = vgg19.VGG19(weights=None,
#                     input_tensor=input_tensor,
#                     include_top=False)
# model.load_weights("vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5")

# model.summary()

In [13]:
def load_vgg_model(path):
    """
    Returns a model for the purpose of 'painting' the picture.
    Takes only the convolution layer weights and wrap using the TensorFlow
    Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
    the paper indicates that using AveragePooling yields better results.
    The last few fully connected layers are not used.
    Here is the detailed configuration of the VGG model:
        0 is conv1_1 (3, 3, 3, 64)
        1 is relu
        2 is conv1_2 (3, 3, 64, 64)
        3 is relu    
        4 is maxpool
        5 is conv2_1 (3, 3, 64, 128)
        6 is relu
        7 is conv2_2 (3, 3, 128, 128)
        8 is relu
        9 is maxpool
        10 is conv3_1 (3, 3, 128, 256)
        11 is relu
        12 is conv3_2 (3, 3, 256, 256)
        13 is relu
        14 is conv3_3 (3, 3, 256, 256)
        15 is relu
        16 is conv3_4 (3, 3, 256, 256)
        17 is relu
        18 is maxpool
        19 is conv4_1 (3, 3, 256, 512)
        20 is relu
        21 is conv4_2 (3, 3, 512, 512)
        22 is relu
        23 is conv4_3 (3, 3, 512, 512)
        24 is relu
        25 is conv4_4 (3, 3, 512, 512)
        26 is relu
        27 is maxpool
        28 is conv5_1 (3, 3, 512, 512)
        29 is relu
        30 is conv5_2 (3, 3, 512, 512)
        31 is relu
        32 is conv5_3 (3, 3, 512, 512)
        33 is relu
        34 is conv5_4 (3, 3, 512, 512)
        35 is relu
        36 is maxpool
        37 is fullyconnected (7, 7, 512, 4096)
        38 is relu
        39 is fullyconnected (1, 1, 4096, 4096)
        40 is relu
        41 is fullyconnected (1, 1, 4096, 1000)
        42 is softmax
    """
    
    vgg = scipy.io.loadmat(path)

    vgg_layers = vgg['layers']
    
    def _weights(layer, expected_layer_name):
        """
        Return the weights and bias from the VGG model for a given layer.
        """
        wb = vgg_layers[0][layer][0][0][2]
        W = wb[0][0]
        b = wb[0][1]
        layer_name = vgg_layers[0][layer][0][0][0][0]
        assert layer_name == expected_layer_name
        return W, b

        return W, b

    def _relu(conv2d_layer):
        """
        Return the RELU function wrapped over a TensorFlow layer. Expects a
        Conv2d layer input.
        """
        return tf.nn.relu(conv2d_layer)

    def _conv2d(prev_layer, layer, layer_name):
        """
        Return the Conv2D layer using the weights, biases from the VGG
        model at 'layer'.
        """
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

    def _conv2d_relu(prev_layer, layer, layer_name):
        """
        Return the Conv2D + RELU layer using the weights, biases from the VGG
        model at 'layer'.
        """
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer):
        """
        Return the AveragePooling layer.
        """
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph model.
    graph = {}
    graph['input']   = tf.Variable(np.zeros((1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)), dtype = 'float32')
    graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    
    return graph

In [14]:
model = load_vgg_model("./imagenet-vgg-verydeep-19.mat")

In [23]:
def total_loss(sess, alpha=10, beta=40):

    # Assign the input of the model to be the "content" image 
    sess.run(model["input"].assign(content_image))

    # Get content loss from output of block 4, layer 2
    out = model["conv4_2"]
    content_features = sess.run(out)
    generated_features = out
    content_loss = compute_content_loss(content_features, generated_features)
    
    # loss for style image
    style_loss = 0.
    
    # Assign the input of the model to be the "style" image 
    sess.run(model["input"].assign(style_image))
    
    for layer_name, coeff in STYLE_LAYERS:
        # Select the output tensor of the currently selected layer
        out = model[layer_name]
        style_features = sess.run(out)
        generated_features = out
        style_loss += coeff * style_layer_loss(style_features, generated_features)
    
    # Get total loss using alpha and beta
    total_loss = (content_loss*alpha) + (style_loss*beta)
    
    return content_loss, style_loss, total_loss

In [24]:
content_loss, style_loss, J = total_loss(sess)

In [25]:
# define optimizer
optimizer = tf.train.AdamOptimizer(2.0)

# define train_step
train_step = optimizer.minimize(J)

In [26]:
def save_image(path, image):
    
    # Un-normalize the image so that it looks good
    image = image + CONFIG.MEANS
    
    # Clip and Save the image
    image = np.clip(image[0], 0, 255).astype('uint8')
    scipy.misc.imsave(path, image)

In [27]:
def model_nn(sess, input_image, num_iterations = 21):
    
    # Initialize global variables (you need to run the session on the initializer)
    sess.run(tf.global_variables_initializer())
    
    # Assign the input of the model to be the "input" image 
    sess.run(model["input"].assign(input_image))
    
    for i in range(num_iterations):
        
        # Run the session on the train_step to minimize the total cost
        sess.run([train_step])
        
        # Compute the generated image by running the session on the current model['input']
        generated_image = sess.run(model["input"])

        # Print every 20 iteration.
        if i%20 == 0:
            Jt, Jc, Js = sess.run([J, content_loss, style_loss])
            print("Iteration " + str(i) + " :")
            print("content cost = ", Jc)
            print("style cost = ", Js)
            print("total cost = ", Jt)
            
            # save current generated image in the "/output" directory
            save_image("output/" + str(i) + ".png", generated_image)

    # save last generated image
    save_image('output/generated_image.jpg', generated_image)
    
    return generated_image

In [28]:
out = model_nn(sess, generated_image)

# print(generated_image)

Iteration 0 :
content cost =  8496.51
style cost =  2.12962e+09
total cost =  8.5185e+10


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
  


Iteration 20 :
content cost =  15803.4
style cost =  3.91199e+08
total cost =  1.56481e+10
