In [1]:
from keras import backend
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.


## import an image into the environment and reshape

In [2]:
content_image = Image.open("content_image.jpeg").resize((512,512))

## import the style image into the environment and reshape

In [3]:
style_image = Image.open("style_image.jpg").resize((512,512))

## convert the images to numpy arrays for further processing

In [4]:
content_array = np.asarray(content_image, "float32")
content_array = np.expand_dims(content_array, axis = 0)

style_array = np.asarray(style_image, "float32")
style_array = np.expand_dims(style_array, axis = 0)

In [5]:
content_array.shape, style_array.shape

((1, 512, 512, 3), (1, 512, 512, 3))

### We added one extra dimension using the expand_dims function for concatenating these images into a tensor later, which will be the input to a VGG conv net model..

### Now according to the paper written for VGG we subtract the input images with the mean of the data which was calculated in the imagenet competition. This is available in google. 

### We do this to center the intensities of the image at zero to improve the accuracy and training speed. 

In [6]:
content_array[:,:,:,0] -= 103.939
content_array[:,:,:,1] -= 116.779
content_array[:,:,:,2] -= 123.68

style_array[:,:,:,0] -= 103.939
style_array[:,:,:,1] -= 116.779
style_array[:,:,:,2] -= 123.68


### We convert our RGB values to BGR to match the architecture of VGG16

In [7]:
content_array = content_array[:,:,:, ::-1]
style_array = style_array[:,:,:, ::-1]

### Now lets create our input tensor using keras backend(tensorflow graph). what is backend is explained really well here. check it out. http://keras.io/backend/

In [8]:
content_variable = backend.variable(content_array)
style_variable = backend.variable(style_array)

### Let's initialize a combination image variable using the backend placeholder function. 

In [9]:
combination_variable = backend.placeholder(shape = content_array.shape)

In [10]:
combination_variable.shape, style_variable.shape, content_variable.shape

(TensorShape([1, 512, 512, 3]),
 TensorShape([1, 512, 512, 3]),
 TensorShape([1, 512, 512, 3]))

### Using the concatenate function in backend we will concatenate these three variables which will return a tensor

In [11]:
tensor_variable = backend.concatenate([ content_variable, style_variable, combination_variable], axis = 0)

In [12]:
tensor_variable

<tf.Tensor 'concat:0' shape=(3, 512, 512, 3) dtype=float32>

### Notice how we are concatenating on the first dimension? This way we preserve all the data we have so far. 

### As mentioned above we will be using a pre trained model called VGG16 which was initally developed as a classification problem

#### If you are not familiar with conv nets i will write another blog post explaining in detail what they mean and how they are useful. 

#### VGG 16 is a deep learning model with 16 layers and is a conv net classifier. Conv nets are believed to be able to percieve and "understand" images. But every conv net model have a few layers which perform a classification these are called fully connected layers or FC layers. We dont need these layers for our algorithm so we will choose only the ones we will need.  

### We can import the VGG16 model from keras applications 

In [13]:
from keras.applications.vgg16 import VGG16

In [14]:
vgg_model = VGG16(input_tensor = tensor_variable, weights = "imagenet", include_top = False)

### include_top = False will ignore the fully connected layers and only retrieve the conv net layers.  

In [15]:
vgg_model.layers

[<keras.engine.input_layer.InputLayer at 0x7f18fc1b3bd0>,
 <keras.layers.convolutional.Conv2D at 0x7f18fc641950>,
 <keras.layers.convolutional.Conv2D at 0x7f18fc1e4e10>,
 <keras.layers.pooling.MaxPooling2D at 0x7f18fc1b3d90>,
 <keras.layers.convolutional.Conv2D at 0x7f18fc190c50>,
 <keras.layers.convolutional.Conv2D at 0x7f18f82cad10>,
 <keras.layers.pooling.MaxPooling2D at 0x7f18f82d1a50>,
 <keras.layers.convolutional.Conv2D at 0x7f18f82d1b10>,
 <keras.layers.convolutional.Conv2D at 0x7f18f82d5990>,
 <keras.layers.convolutional.Conv2D at 0x7f18f82dac90>,
 <keras.layers.pooling.MaxPooling2D at 0x7f18f82e0d10>,
 <keras.layers.convolutional.Conv2D at 0x7f18f82e0d90>,
 <keras.layers.convolutional.Conv2D at 0x7f18f82e7fd0>,
 <keras.layers.convolutional.Conv2D at 0x7f18f826dd10>,
 <keras.layers.pooling.MaxPooling2D at 0x7f18f8273fd0>,
 <keras.layers.convolutional.Conv2D at 0x7f18f8273f50>,
 <keras.layers.convolutional.Conv2D at 0x7f18f8279ed0>,
 <keras.layers.convolutional.Conv2D at 0x7f18f

In [16]:
layers  = dict([(layer.name, layer.output) for layer in vgg_model.layers])
layers

{'input_1': <tf.Tensor 'concat:0' shape=(3, 512, 512, 3) dtype=float32>,
 'block1_conv1': <tf.Tensor 'block1_conv1/Relu:0' shape=(3, 512, 512, 64) dtype=float32>,
 'block1_conv2': <tf.Tensor 'block1_conv2/Relu:0' shape=(3, 512, 512, 64) dtype=float32>,
 'block1_pool': <tf.Tensor 'block1_pool/MaxPool:0' shape=(3, 256, 256, 64) dtype=float32>,
 'block2_conv1': <tf.Tensor 'block2_conv1/Relu:0' shape=(3, 256, 256, 128) dtype=float32>,
 'block2_conv2': <tf.Tensor 'block2_conv2/Relu:0' shape=(3, 256, 256, 128) dtype=float32>,
 'block2_pool': <tf.Tensor 'block2_pool/MaxPool:0' shape=(3, 128, 128, 128) dtype=float32>,
 'block3_conv1': <tf.Tensor 'block3_conv1/Relu:0' shape=(3, 128, 128, 256) dtype=float32>,
 'block3_conv2': <tf.Tensor 'block3_conv2/Relu:0' shape=(3, 128, 128, 256) dtype=float32>,
 'block3_conv3': <tf.Tensor 'block3_conv3/Relu:0' shape=(3, 128, 128, 256) dtype=float32>,
 'block3_pool': <tf.Tensor 'block3_pool/MaxPool:0' shape=(3, 64, 64, 256) dtype=float32>,
 'block4_conv1': <t

### Our model and our input is ready. We need to work on how to calculate loss. 

## Loss

#### Since the combination_image is a combination of both content and style images we need to calculate the loss respective to both these images. 

#### We choose the numbers based on the influence we want from each of these images in the combined image. 

In [22]:
content_weight = 0.025
style_weight = 5
total_variation_weight = 1

#### Initializing a tensor with 0 to store the loss

In [30]:
loss = backend.variable(0.)

### Content Loss

Calcuate loss between the combination image and the content image using the euclidean loss

Choosing the block2_conv2 layer for extracting  the content image features but we can choose others as well. 
Based on tensor we created before we can extract each of the features using the layers dict which contains block2_conv2 tensor.  

In [31]:
def calculate_content_loss(content, combination):
    return backend.sum(backend.square(content - combination))
    
layer_features = layers["block2_conv2"]
print(layer_features)
content_image_features = layer_features[0,:,:,:]
combination_image_features = layer_features[2,:,:,:]
print(content_image_features)

Tensor("block2_conv2/Relu:0", shape=(3, 256, 256, 128), dtype=float32)
Tensor("strided_slice_10:0", shape=(256, 256, 128), dtype=float32)


#### Finally calculating loss

In [33]:
loss.assign_add(content_weight * calculate_content_loss(content_image_features, combination_image_features))

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=0.0>

### Style Loss

#### Calculating style loss is a bit tricky but we will get through this. 

#### Calculate the gram matrix

In [34]:
def calculate_gram_matrix(matrix):
    features = backend.batch_flatten(backend.permute_dimensions(matrix, (2, 0, 1)))
    return backend.dot(backend.transpose(features))

In [None]:
def calculate_style_loss(style, combination):
    style_gram = calculate_gram_matrix(style)
    combination_gram = calculate_gram_matrix(combination)
    channels = 3
    size = 512 * 512 ## height * width
    return backend.sum(backend.square(style_gram - combination_gram)) / (4. * (channels **2) * (size ** 2))1
    