# Deep Neural Style Match

Use definition of neural style by Gatys et al. (2015) (https://arxiv.org/abs/1508.06576) and match an unknown picture to an artist's style


In [None]:
import os
import sys
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
from nst_utils import *
import numpy as np
import tensorflow as tf
import imageio
import random

%matplotlib inline

## 1 - Load VGG

We'll use VGG-19 (arXiv:1409.1556) trained on ImageNet. Download from http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat
The load_vgg_model function has some copyright. 

In [None]:
model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
print(model)

## 2 - Visualize paintings of a few artists

**Caveat: Need to download some images from the web to make this work** 

Visualize some data. We have images 1 to 6 by chagall, kinkade, matisse, monet, and picasso. 

Images are randomnly selected, execute several times to see the full database

In [None]:
#content_image = scipy.misc.imread("images/louvre.jpg")
artists = ["chagall", "kinkade", "matisse", "monet", "picasso"]
artist = random.choice(artists)
print(artist)
image_name = artist+str('-')+str(random.randint(1,6))
print(image_name)
example_image = imageio.imread('artist-styles/'+image_name+'.jpg')
imshow(example_image)

### 3.2 - Computing the style cost

Create a function which takes a picture and calculates its style. 

### 3.2.1 - Style matrix

The neural style paper by Gatys et al. defines style as the gram matrix of activations. The gram matrix is the inner product (dot product) of the activations ${\displaystyle G_{ij} = \langle v_i,v_j\rangle = v_{i}^T v_{j} = np.dot(v_{i}, v_{j})  }$ see https://en.wikipedia.org/wiki/Gramian_matrix. 

We use the unrolled activations (i.e. 300 x 300 pixel-activation-matrix -> 90000 element vector) as the $v_i$ and $v_j$. Say one filter detects edges and one filter detects green. The strenght of the unnormalized correlation (unnormalized correlation $C_{ij} = v_{i} \cdot v_{j}$, not correlation coefficient $c_{ij} = \frac{v_{i} \cdot v_{j}}{|v_i|\cdot|v_j|}$) will tell you that whenever there's an edge, it's green! You just found the artist likes green edges. Importantly, the diagonal elements of the gram matrix measure how strongly a feature is prevalent. Say a filter detects horizontal lines, and there are a lot of horizontal lines in the picture, the gram matrix will show!
As Andrew puts it: 
> By capturing the prevalence of different types of features ($G_{ii}$), as well as how much different features occur together ($G_{ij}$), the Style matrix $G$ measures the style of an image. 

The gram matrix of A is $G_A = AA^T$.

In [None]:
# Gram Matrix
def gram_matrix(A):
    """
    Argument:
    A -- matrix of shape (n_C, n_H*n_W)
    
    Returns:
    GA -- Gram matrix of A, of shape (n_C, n_C)
    """
    
    GA = tf.tensordot(A,tf.transpose(A),axes=1) # axes is over how many axes should be summed
    #GA = tf.matmul(A,A,transpose_a=False,transpose_b=True) #also works
    
    return GA

In [None]:
# Test gram matrix fuct
tf.reset_default_graph()

with tf.Session() as test:
    A = [[1,2],[3,4]]
    GA = gram_matrix(A)
    
    print("GA = " + str(GA.eval()))

**Expected Output**:

<table>
    <tr>
        <td>
            GA = 
        </td>
        <td>
        [[ 5 11]
         [11 25]]
        </td>
    </tr>

</table>

### 3.2.2 Image style - helper functions
So far, we can calcluate the gram matrix (i.e. style) for a given activation of a particular layer. Let's define some helper functions to calculate the style of an image


In [None]:
def layer_style(a_S):
    """
    Arguments:
    a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S 
    Returns: 
    G_S -- Gram matrix of layer
    """
    
    # Retrieve dimensions from a_S
    #m, n_H, n_W, n_C = a_S.shape # if a_S was an ndarray
    m, n_H, n_W, n_C = a_S.get_shape().as_list()
    
    # Reshape the images to have them of shape (n_C, n_H*n_W) (≈2 lines)
    a_S = tf.transpose(tf.reshape(a_S,[n_H*n_W,n_C]))

    # Computing gram_matrices for both images S and G (≈2 lines)
    G_S = gram_matrix(a_S)
    
    return G_S

In [None]:
# test layer_style
tf.reset_default_graph()

with tf.Session() as test:
    # some arbitrary activation of shape (m=1,n_H,n_W,n_C)
    tf.set_random_seed(1)
    a_S = tf.random_normal([1, 4, 4, 3], mean=1, stddev=4)
    # the gram matrix 
    my_style = layer_style(a_S)
    
    print("my_style = " + str(my_style.eval()))

TODO: What is the expected result of above cell?

In [None]:
# The layers for which we want to extract their style
# and the weigth (used later) for calculating the style cost
STYLE_LAYERS = [
    ('conv1_1', 0.2),
    ('conv2_1', 0.2),
    ('conv3_1', 0.2),
    ('conv4_1', 0.2),
    ('conv5_1', 0.2)]

In [None]:
def compute_style(tf_session, model, STYLE_LAYERS):
    
    GS = [] # empty array
    # Loop over specified layers
    for layer_name, coeff in STYLE_LAYERS:
        #print(layer_name)
        
        # Select this hidden layer's activation
        a_S = model[layer_name]
    
        # Get the gram matrix for this layer
        GS_i = layer_style(a_S)
        # GS_i is of shape (n_C,n_C)
        # print(GS_i.shape)
        
        GS_i = tf_session.run(GS_i)
        assert (isinstance(GS_i,np.ndarray))
        GS.append(GS_i)

    return GS

### How to calculate an activation in tensorflow

In [None]:
tf.reset_default_graph()

with tf.Session() as sess:
    
    model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

    style_image = reshape_and_normalize_image(example_image)

    # Assign the content image to be the input of the VGG model.  
    sess.run(model['input'].assign(style_image))

    # Select the output tensor of layer conv4_2
    out = model['conv4_2']

    # Set a_C to be the hidden layer activation from the layer we have selected
    a_C = sess.run(out)

    print(a_C.shape)

### 3.2.3 Style of an image
Let's calculate the style of an image

In [None]:
tf.reset_default_graph()

with tf.Session() as sess:
    
    model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

    style_image = reshape_and_normalize_image(example_image)

    # Assign the content image to be the input of the VGG model.  
    sess.run(model['input'].assign(style_image))

    style = compute_style(sess, model,STYLE_LAYERS)
    
    # style is a list of nd arrays
#     for i in style:
#         print(type(i))

### 3.2.4 - Style database
We can capture the style of an image, so let's build a database of the images

In [None]:
style_db = dict()

tf.reset_default_graph()

model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

#
# reference styles
#
# artists defined above as artists = ["chagall", "kinkade", "matisse", "monet", "picasso"]
for artist in artists:
    print(artist)
    for i in range(1,6):
            
        with tf.Session() as sess:
                
            image_name = artist+str('-')+str(i)
            print(image_name)
            style_image = imageio.imread('artist-styles/'+image_name+'.jpg')
            style_image = reshape_and_normalize_image(style_image)

            # Assign the content image to be the input of the VGG model.  
            sess.run(model['input'].assign(style_image))

            style = compute_style(sess, model,STYLE_LAYERS)
#             print(type(style))
#             for i in style:
#                 print(type(i))

            style_db[image_name] = style

### 3.2.5 - Target style
Having calculated the style of a bunch of paintings, let's calculate the style of the target picture

In [None]:
target_style = list()
tf.reset_default_graph()

with tf.Session() as sess:
    
    model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

    #
    # target style 
    #
    image_name = 'target'
    print(image_name)
    style_image = imageio.imread('artist-styles/'+image_name+'.jpg')
    style_image = reshape_and_normalize_image(style_image)

    # Assign the content image to be the input of the VGG model.  
    sess.run(model['input'].assign(style_image))

    target_style = compute_style(sess,model,STYLE_LAYERS)
    print(type(target_style))
    for i in target_style:
        print(type(i))

# Style distance
We calculated the style for our target picture $T$ and a variety of reference paintings $R$. Now we find the picture in the db with the minimal distance to our target style. We use the difference in Gram matrix $G$ For each layer, the distance is

$$dist^{[l]}(R,T) = \frac{1}{4 \times {n_C}^2 \times (n_H \times n_W)^2} \sum _{i=1}^{n_C}\sum_{j=1}^{n_C}(G^{(R)}_{ij} - G^{(T)}_{ij})^2\tag{2} $$

where $G^{(R)}$ and $G^{(T)}$ are respectively the Gram matrices of the "reference" db image and the "target" image, computed using the hidden layer activations for a particular hidden layer in the network.  


In [None]:
def get_nc_nh_nw_list(model,STYLE_LAYERS):
    
    nc_nh_nw_list = list()
    # loop over the relevant layers
    for layer_name, coeff in STYLE_LAYERS:
        print(layer_name)

        # Set a_S to be the hidden layer activation from the layer we have selected
        a_S = sess.run(model[layer_name])
        
        m, n_H, n_W, n_C = a_S.shape
        nc_nh_nw_list.append([n_C, n_H, n_W])
        
        print(n_C, n_H, n_W)
        #print(list)
    return nc_nh_nw_list
        

In [None]:
nc_nh_nw_list = list()

tf.reset_default_graph()

with tf.Session() as sess:
    
    model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")

    # Our target image
    image_name = 'target'
    print(image_name)
    style_image = imageio.imread('artist-styles/'+image_name+'.jpg')
    style_image = reshape_and_normalize_image(style_image)

    # Assign the content image to be the input of the VGG model.  
    sess.run(model['input'].assign(style_image))
    
    nc_nh_nw_list = get_nc_nh_nw_list(model,STYLE_LAYERS)

In [None]:
def get_dist(ref_list,tar_list,nc_nh_nw_list):
    """
    Arguments
    ref_list -- list of gram matrices of reference picture
    tar_list -- list of gram matrices of target picture
    nc_nh_nw_list -- list of nc = number of channels, nh = number of pixels in height, nw = width
    """
    
    dist = 0
    for (ref_G_L, tar_G_L,(n_C,n_H,n_W)) in zip(ref_list,tar_list,nc_nh_nw_list):
#         print(ref_G_L.shape)
#         print(tar_G_L.shape)
#         print(n_C,n_H,n_W)
#         print('-------')
        #print(type(ref_G_L))
        dist += 1/(2*n_C*n_H*n_W)**2*np.sum(np.square(np.subtract(ref_G_L,tar_G_L)))
    return dist

In [None]:
min_dist = 10**100
min_name = "None"
for key, value in style_db.items():
    dist = get_dist(value,target_style,nc_nh_nw_list)
    print("min_dist {}, dist {}, key {}".format(min_dist,dist,key))
    if (dist < min_dist):
        min_dist = dist
        min_name = key
print(min_name)

In [None]:
dist_dict = dict()
for key, value in style_db.items():
    dist = get_dist(value,target_style,nc_nh_nw_list)
    dist_dict[key] = dist
sorted(dist_dict, key=dist_dict.__getitem__)


**Summary**:
We see that the target picture's distance to the kinkades is small. This means the target picture is predicted to be a kinkade by the algorithm. In fact, the target picture is indeed one of Kinkade's paintings. 

**Shortcomings**:
In one shot learning, we would train to optimize for a large difference between invalid pairs (say picasso - monet) and a small difference between valid pairs (chagall - chagall). We don't put in the effort of actually doing this since the implementation already has 100% accuracy on our small test set. 