In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Adjust verbosity to suppress information logs

from six.moves import urllib
from scipy.misc import imsave
from PIL import Image, ImageOps

%matplotlib inline
import matplotlib.pyplot as plt

import time
import scipy.io  # For loading pre-trained VGG's *.mat files using loadmat
import numpy as np
import tensorflow as tf

### Utility Functions

In [2]:
def make_dir(path):
    """Create a directory if directory does not exist."""
    try:
        os.mkdir(path)
    except OSError:
        pass
    
def save_image(path, image):
    """Save image to a specified path."""
    image = image[0]
    image = np.clip(image, a_min=0, a_max=255).astype('uint8')
    imsave(path, image)
    
def generate_noise_image(content_image, width, height, noise_ratio=0.6):
    """Generate a noise image"""
    noise_image = np.random.uniform(low=-20, high=20, 
                                    size=(1, height, width, 3)).astype(np.float32)
    
    return noise_image * noise_ratio + content_image * (1 - noise_ratio)

def download_model(url, file_name, expected_bytes):
    """
    Download the pre-trained VGG-19 model if it is not already downloaded.
    """
    if os.path.exists(file_name):
        print('VGG-19 pre-trained model is ready.')
        return
    print('Downloading the pre-trained VGG-19 model ...')
    file_name, _ = urllib.request.urlretrieve(url, file_name)
    file_stat = os.stat(file_name)
    if file_stat.st_size == expected_bytes:
        print('Successfully downloaded VGG-19 pre-trained model', file_name)
    else:
        raise Exception('File: ' + file_name + ' might be corrupted. Try downloading with browser.')
        
def resize_image(img_path, width, height, save=False):
    """Resize image (and save it), and return image by expanding dims"""
    image = Image.open(img_path)
    
    # PIL is column major so swap the places of width and height
    image = ImageOps.fit(image, size=(width, height), method=Image.ANTIALIAS)  # Returns a sized and cropped version of the image
    
    # Save resized image
    if save:
        img_dirs = img_path.split('/')
        img_dirs[-1] = 'resized_' + img_dirs[-1]  # ./img.jpg -> ./resized_img.jpg
        out_path = '/'.join(img_dirs)
        if not os.path.exists(out_path):
            image.save(out_path)
    
    # Return resized image after expanding dims
    image = np.asarray(image, dtype=np.float32)  # Convert input (PIL format) to array
    return np.expand_dims(image, axis=0)

def plot_images(content_img, style_img, generated_img):
    # Create figure with sub-plots
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(10, 10))
    
    # Adjust spacing
    fig.subplots_adjust(hspace=0.1, wspace=0.1)
    
    # Use interpolation to smooth pixels
    smooth = True
    
    if smooth:
        interpolation = 'sinc'
    else:
        interpolation = 'nearest'
        
    # Plot content image
    # NOTE: Pixel vales are divided by 255 to normalize to [0.0, 1.0] range 
    ax = axes.flat[0]
    ax.imshow(content_img / 255.0, interpolation=interpolation)
    ax.set_xlabel('Content')
    
    # Plot generated image
    ax = axes.flat[1]
    ax.imshow(generated_img / 255.0, interpolation=interpolation)
    ax.set_xlabel('Generated')
    
    # Plot style image
    ax = axes.flat[2]
    ax.imshow(style_img / 255.0, interpolation=interpolation)
    ax.set_xlabel('Style')
    
    # Remove ticks from all the plots
    for ax in axes.flat:
        ax.set_xticks([])
        ax.set_yticks([])
        
    plt.show()

### VGG-19 Model

In [3]:
# VGG-19 parameters file
VGG_DOWNLOAD_LINK = 'http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat'
VGG_FILENAME = 'imagenet-vgg-verydeep-19.mat'
EXPECTED_BYTES = 534904783

class VGG:
    def __init__(self, input_img):
        download_model(VGG_DOWNLOAD_LINK, VGG_FILENAME, EXPECTED_BYTES)
        self.input_img = input_img
        self.vgg_layers = scipy.io.loadmat(VGG_FILENAME)['layers']
        # VGG-19 trained with mean centered images with mean = [123.68, 116.779, 103.939] along RGB dimensions
        self.mean_pixels = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))
        
    def _weights(self, layer_idx, expected_layer_name):
        """
        Return the weights and biases at layer_idx from pre-trained VGG-19.
        """
        W = self.vgg_layers[0][layer_idx][0][0][2][0][0]
        b = self.vgg_layers[0][layer_idx][0][0][2][0][1]
        layer_name = self.vgg_layers[0][layer_idx][0][0][0][0]
        assert layer_name == expected_layer_name
        return W, b.reshape(b.size)
    
    def conv_relu(self, prev_layer, layer_idx, layer_name):
        """
        Create CONV layer with ReLU using the weights and biases extracted 
        from VGG-19 model at layer_idx.
        
        _weights returns numpy arrays, convert them to TF tensors.
        
        Args:
          prev_layer: output tensor from the previous layer
          layer_name: str, to name the layer. It is used to specify 
          variable scope.
        """
        with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE) as scope:
            W, b = self._weights(layer_idx, layer_name)
            W_tensor = tf.convert_to_tensor(W, np.float32, name='weight')
            b_tensor = tf.convert_to_tensor(b, np.float32, name='bias')
            conv = tf.nn.conv2d(prev_layer, 
                                filter=W_tensor, 
                                strides=[1, 1, 1, 1], 
                                padding='SAME')
            out = tf.nn.relu(conv + b_tensor)
        setattr(self, layer_name, out)  # set layer_name attribute as output tensor
    
    def avg_pool(self, prev_layer, layer_name):
        """
        Create the average pooling layer. According to paper (arXiv: 
        https://arxiv.org/pdf/1508.06576.pdf) replacing max-pooling with
        average pooling improves gradient flow.
        
        Args:
          prev_layer: output tensor from the previous layer
          layer_name: str, to name the layer. It is used to specify 
          variable scope.
        """
        with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE) as scope:
            out = tf.nn.avg_pool(prev_layer, 
                                 ksize=[1, 2, 2, 1], 
                                 strides=[1, 2, 2, 1], 
                                 padding='SAME')
            
        setattr(self, layer_name, out)  # set layer_name attribute as output tensor
        
    def load(self):
        self.conv_relu(self.input_img, 0, 'conv1_1')
        self.conv_relu(self.conv1_1, 2, 'conv1_2')
        self.avg_pool(self.conv1_2, 'avgpool1')
        self.conv_relu(self.avgpool1, 5, 'conv2_1')
        self.conv_relu(self.conv2_1, 7, 'conv2_2')
        self.avg_pool(self.conv2_2, 'avgpool2')
        self.conv_relu(self.avgpool2, 10, 'conv3_1')
        self.conv_relu(self.conv3_1, 12, 'conv3_2')
        self.conv_relu(self.conv3_2, 14, 'conv3_3')
        self.conv_relu(self.conv3_3, 16, 'conv3_4')
        self.avg_pool(self.conv3_4, 'avgpool3')
        self.conv_relu(self.avgpool3, 19, 'conv4_1')
        self.conv_relu(self.conv4_1, 21, 'conv4_2')
        self.conv_relu(self.conv4_2, 23, 'conv4_3')
        self.conv_relu(self.conv4_3, 25, 'conv4_4')
        self.avg_pool(self.conv4_4, 'avgpool4')
        self.conv_relu(self.avgpool4, 28, 'conv5_1')
        self.conv_relu(self.conv5_1, 30, 'conv5_2')
        self.conv_relu(self.conv5_2, 32, 'conv5_3')
        self.conv_relu(self.conv5_3, 34, 'conv5_4')
        self.avg_pool(self.conv5_4, 'avgpool5')

### Style Transfer

[A Neural Algorithm of Artistic Style (Gatys et al., 2016) 
](https://arxiv.org/pdf/1508.06576.pdf)

In [12]:
class StyleTransfer:
    
    def __init__(self, content_img, style_img, img_width, img_height):
        """
        Args:
            content_img: str, path to content image.
            style_img: str, path to style image.
            img_width: int, width for output image.
            img_height: ing, height for output image.
            
        NOTE: Input content image and input style image will be resized 
        to match img_width and img_height
        """
        self.img_width = img_width
        self.img_height = img_height
        self.content_img = resize_image(content_img, self.img_width, self.img_height)
        self.style_img = resize_image(style_img, self.img_width, self.img_height)
        self.initial_img = generate_noise_image(self.content_img, self.img_width, self.img_height)
        
        # Create global step and hyper-parameters for the model
        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
        self.content_layer = 'conv4_2'  # Paper
        self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']  # Paper
        self.content_w = 0.01  # alpha/beta = 0.001 or 0.0001 but 1/20 or 1/50 works fine too.
        self.style_w = 1.0 
        self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0]  # More emphasis on deep layers
        self.lr = 2.0
        
    def create_input(self):
        """
        All the 3 inputs (content image, style image, generated image) have the 
        same dimensions and are inputs to same computation to extract the same set
        of features. To avoid assembling same sub-graphs multiple times, use one 
        variable for the 3 inputs.
        
        NOTE: image height corresponds to number of rows.
        """
        with tf.variable_scope('input', reuse=tf.AUTO_REUSE) as scope:
            self.input_img = tf.get_variable(name='input_img', 
                                             shape=[1, self.img_height, self.img_width, 3], 
                                             dtype=tf.float32, 
                                             initializer=tf.zeros_initializer())
            
    def load_vgg(self):
        """
        Load the saved model parameters of VGG-19. Use input_img as input to 
        compute the output at each layer of VGG.
        
        During training VGG-19, the images were mean centered with mean pixels
        to be [123.68, 116.779, 103.939] along RGB dimensions, so this should 
        be subtracted from images.
        """
        self.vgg = VGG(self.input_img)
        self.vgg.load()
        self.content_img -= self.vgg.mean_pixels  # subtract mean_pixels from content
        self.style_img -= self.vgg.mean_pixels  # subtract mean_pixels from style
        
    def _content_loss(self, P, F):
        """
        Calculate the loss between the feature representation of the 
        content image and the generated image.
        
        Args:
          P: Content representation of the content image
          F: Content representation of the generated image
        """
        # NOTE: P is content image content and will remain constant during training
        # however F be assigned initial_img and F will vary as training goes on 
        coefficient = tf.reciprocal(tf.cast(4 * tf.reduce_prod(P.shape), dtype=tf.float32))
        self.content_loss = coefficient * tf.reduce_sum(tf.square(P - F))
    
    def _gram_matrix(self, F, N, M):
        """
        Create and return the gram matrix for tensor F.
        """
        F_reshape = tf.reshape(F, shape=[N, M])
        return tf.matmul(F_reshape, tf.transpose(F_reshape))
    
    def _layer_style_loss(self, a, g):
        """
        Calculate the style loss at a certain layer.
        
        Args:
          a: feature representation of the style image at layer.
          g: feature representation of the generate image at layer.
        """
        M = tf.reduce_prod(a.shape[:3])
        N = a.shape[-1]
        a_gram = self._gram_matrix(a, N, M)
        g_gram = self._gram_matrix(g, N, M)
        coefficient = tf.reciprocal(tf.cast(4 * tf.square(N) * tf.square(M), dtype=tf.float32))
        return coefficient * tf.reduce_sum(tf.square(g_gram - a_gram))
    
    def _style_loss(self, A):
        """
        Calculate the total style loss as a weighted sum of style losses at 
        all style layers.
        """
        # NOTE: A is a list of different layer's style features computed from style image and 
        # will remain constant during training however `getattr` will compute style features
        # from initial_img and style features will vary as training goes on
        wE = [w * self._layer_style_loss(a, getattr(self.vgg, l)) for w, a, l in 
              zip(self.style_layer_w, A, self.style_layers)]
        self.style_loss = tf.reduce_sum(wE)
        
    def loss(self):
        """
        Calculate loss: alpha * content_loss + beta * style_loss
        """
        with tf.variable_scope('loss', reuse=tf.AUTO_REUSE) as scope:
            with tf.Session() as sess:
                # Assign content image to input variable
                sess.run(self.input_img.assign(self.content_img))
                generate_img_content = getattr(self.vgg, self.content_layer)  # Tensor
                content_img_content = sess.run(generate_img_content)
                print('content_img_content', np.sum(content_img_content))
            self._content_loss(content_img_content, generate_img_content)
            
            with tf.Session() as sess:
                # Assign style image to input variable
                sess.run(self.input_img.assign(self.style_img))
                style_layers_features = sess.run([getattr(self.vgg, layer) 
                                                  for layer in self.style_layers])
                
                print('style_layers_features', np.sum([np.sum(f) for f in style_layers_features]))
                
            self._style_loss(style_layers_features)
                
        # alpha * content loss + beta * style loss   
        self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss
        
    def optimize(self):
        """Gradient Descent Optimizer"""
        self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.lr).minimize(self.total_loss, global_step=self.global_step)
        
    def summarize(self):
        with tf.name_scope("summaries"):
            tf.summary.scalar("content_loss", self.content_loss)
            tf.summary.scalar("style_loss", self.style_loss)
            tf.summary.scalar("combined_loss", self.total_loss)
            # Merge multiple summaries 
            self.summary_op = tf.summary.merge_all()
            
    def setup(self):
        make_dir('checkpoints')
        make_dir('output')
        
    def build(self):
        self.setup()
        self.create_input()
        self.load_vgg()
        self.loss()
        self.optimize()
        self.summarize()
        
    def train(self, n_iters):
        skip_step = 1
        with tf.Session() as sess:
            # Initialize variables
            sess.run(tf.global_variables_initializer())
            
            # Writer to write variables
            writer = tf.summary.FileWriter('./graphs/style', sess.graph)
            
            # Saver to save checkpoints
            saver = tf.train.Saver()
            
            # Get checkpoint state
            ckpt = tf.train.get_checkpoint_state(os.path.dirname('./checkpoints/style'))
            
            # If checkpoint exists then restore from checkpoint
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            
            # Generate noise image and assign it to input_img
            sess.run(self.input_img.assign(self.initial_img))
            
            # Starting point for training loop
            initial_step = self.global_step.eval()
            
            start_time = time.time()
            
            for i in range(initial_step, n_iters):
                if i >= 5 and i < 20:
                    skip_step = 10
                elif i >= 20:
                    skip_step = 20
                    
                _ = sess.run(self.optimizer)  # Optimize and update
                
                if (i + 1) % skip_step == 0:
                    # Get generated image, loss, and summary
                    generated_img, total_loss, summary = sess.run([self.input_img, 
                                                                   self.total_loss, 
                                                                   self.summary_op])
                    
                    # Add back the mean pixes that were subtracted earlier
                    generated_img += self.vgg.mean_pixels
                    
                    # Plot content, style and generated images
                    # plot_images(self.content_img, self.style_img, generated_img)
                    
                    writer.add_summary(summary, global_step=i)
                    print('Step {}\n \tSum: {:5.1f}'.format(i+1, np.sum(generated_img)))
                    print('\tLoss: {:5.1f}'.format(total_loss))
                    print('\tDuration: {} seconds'.format(time.time() - start_time))
                    start_time = time.time()  # Reset start time
                    
                    # Save generated image
                    filename = './output/st_{}.jpg'.format(i)
                    save_image(filename, generated_img)
                    
                    if (i + 1) % 20 == 0:
                        # Save the variables into a checkpoint
                        saver.save(sess, 'checkpoints/style', i)

In [13]:
# Transfer Style
model = StyleTransfer('./data/priel-morgan.jpg', './data/flowers.jpg', img_width=224, img_height=224)
model.build()
model.train(1)

VGG-19 pre-trained model is ready.
content_img_content 63156830.0
style_layers_features 428224540.0
Step 1
 	Sum:   nan
	Loss:   nan
	Duration: 25.613046884536743 seconds


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
  if sys.path[0] == '':


OSError: [Errno 28] No space left on device

# Scratch

In [20]:
# Print Conv layer index and layer names
download_model(VGG_DOWNLOAD_LINK, VGG_FILENAME, EXPECTED_BYTES)
vgg = scipy.io.loadmat(VGG_FILENAME)
print(vgg.keys())

for i in range(vgg['layers'].shape[1]):
    try:
        name = vgg['layers'][0][i][0][0][0][0]
        w = vgg['layers'][0][i][0][0][2][0][0]
        w_shape = np.array(w).shape
        if 'pool' in name:
            print('Index: [{}] -> Layer: "{}" W shape: {}'.format(i, name, w_shape))
        else:
            continue
    except IndexError:
        print('IndexError')

VGG-19 pre-trained model is ready.
dict_keys(['__header__', '__version__', '__globals__', 'layers', 'meta'])
Index: [4] -> Layer: "pool1" W shape: ()
Index: [9] -> Layer: "pool2" W shape: ()
Index: [18] -> Layer: "pool3" W shape: ()
Index: [27] -> Layer: "pool4" W shape: ()
Index: [36] -> Layer: "pool5" W shape: ()
IndexError


In [53]:
class A:
    def __init__(self, x):
        self.x = x
        
    def x_square(self):
        out = self.x ** 2
        setattr(self, 'x_sq', out)
        
    def load(self):
        self.x_square()
        
a = A(3)
print(vars(a))

# After setting attribute
a.load()
print(vars(a))

{'x': 3}
{'x': 3, 'x_sq': 9}


In [152]:
a = np.random.uniform(size=(3, 3, 3, 4))
print(a[:, 1, 1, 1])

# at = tf.convert_to_tensor(a, tf.float32)
at = tf.constant(a)
with tf.Session() as sess:
    print(sess.run(at[:, 1, 1, 1]))

with tf.variable_scope(name_or_scope='a', reuse=tf.AUTO_REUSE) as scope:
    w = tf.get_variable(name='w', initializer=at, trainable=False)
    
# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     print(sess.run(w[:, 1, 1, 1]))
#     print(sess.run(tf.equal(w, at)))

[0.33907663 0.76559658 0.58313098]
[0.33907663 0.76559658 0.58313098]


In [7]:
a = np.random.uniform(size=(2, 2, 3))
a

array([[[0.44097298, 0.82561802, 0.06120844],
        [0.94815193, 0.28613082, 0.29290534]],

       [[0.97938947, 0.0907233 , 0.32692558],
        [0.05042714, 0.6130169 , 0.37729897]]])

In [82]:
a = np.array([[[2, 2], [2, 2]], [[2, 2], [2, 2]]])
a = np.expand_dims(a, axis=0)
b = np.array([[[5, 5], [5, 5]], [[5, 5], [5, 5]]])
b = np.expand_dims(b, axis=0)
np.sum(np.square(a - b))

72

In [29]:
# Load image
img = resize_image('./data/flowers.jpg', height=224, width=224, save=False)
print(img.shape)

with tf.variable_scope('input', reuse=tf.AUTO_REUSE) as scope:
    input_img = tf.get_variable(name='image', 
                                shape=[1, 224, 224, 3], 
                                dtype=tf.float32, 
                                initializer=tf.zeros_initializer())
    
vgg = VGG(input_img)
vgg.load()

(1, 224, 224, 3)
VGG-19 pre-trained model is ready.


In [97]:
c_feat = getattr(vgg, 'conv4_2')
g_feat = c_feat * 0.5
den = tf.cast(4 * tf.reduce_prod(g_feat.shape), dtype=tf.float32)
l = tf.reduce_sum(tf.square(g_feat - c_feat)) / den

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    cf = sess.run(c_feat)
    s = tf.reduce_prod(c_feat.shape).eval()
    print(s)
    e = sess.run(l)
    
print(cf.shape)

gf = np.random.uniform(size=(1, 224, 224, 64))
print(gf.shape)
e

401408
(1, 28, 28, 512)
(1, 224, 224, 64)


4.294784

In [69]:
a = np.array([[[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]]])
a = np.expand_dims(a, axis=0)
b = np.array([[[5, 5, 5], [5, 5, 5]], [[5, 5, 5], [5, 5, 5]]])
b = np.expand_dims(b, axis=0)
a.shape, b.shape

In [79]:
ar = np.reshape(a, newshape=(3, -1))
br = np.reshape(b, newshape=(3, -1))
ar.shape, br.shape

((3, 4), (3, 4))

In [86]:
# Gram matrix
ar.dot(br.T)

array([[40, 40, 40],
       [40, 40, 40],
       [40, 40, 40]])

In [107]:
c_feat.shape

TensorShape([Dimension(1), Dimension(28), Dimension(28), Dimension(512)])

In [113]:
M = tf.reduce_prod(c_feat.shape[:3])
N = c_feat.shape[-1]
with tf.Session() as sess:
    print(sess.run(M))
    print(N)

784
512


In [118]:
cr = tf.reshape(c_feat, shape=[N, M])
g = tf.matmul(cr, tf.transpose(cr))

**How `getattr` works when there are 3 inputs: Fix 2 inputs, and vary 3rd during training)**

In [18]:
class ST:
    def __init__(self, content_img, img_width, img_height):
        self.img_width = img_width
        self.img_height = img_height
        self.content_img = resize_image(content_img, self.img_width, self.img_height)
        self.initial_img = generate_noise_image(self.content_img, self.img_width, self.img_height)
        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
        self.content_layer = 'conv4_2'
    
    def check_tensor(self, tensor):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            out = sess.run(tensor)
        
    def create_input(self):
        with tf.variable_scope('input', reuse=tf.AUTO_REUSE) as scope:
            self.input_img = tf.get_variable(name='input_img', 
                                             shape=[1, self.img_height, self.img_width, 3], 
                                             dtype=tf.float32, 
                                             initializer=tf.zeros_initializer())
            
    def load_vgg(self):
        self.vgg = VGG(self.input_img)
        self.vgg.load()
              
    def _content_loss(self, P, F):
        coefficient = tf.reciprocal(tf.cast(4 * tf.reduce_prod(P.shape), dtype=tf.float32))
        self.content_loss = coefficient * tf.reduce_sum(tf.square(P - F))
        
    def loss(self):
        with tf.variable_scope('loss', reuse=tf.AUTO_REUSE) as scope:
            with tf.Session() as sess:
                # Assign content image to input variable
                sess.run(self.input_img.assign(self.content_img))
                generate_img_content = getattr(self.vgg, self.content_layer)  # Tensor
                content_img_content = sess.run(generate_img_content)
                print('P: ', np.sum(content_img_content))
                print('F: ', np.sum(self.check_tensor(generate_img_content)))
            self._content_loss(content_img_content, generate_img_content)
                
        self.total_loss = self.content_loss 
        
    def optimize(self):
        self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(self.total_loss, global_step=self.global_step)

    def build(self):
        self.create_input()
        self.load_vgg()
        self.loss()
        self.optimize()
        
    def train(self, n_iters):
        skip_step = 1
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(self.input_img.assign(self.initial_img))
            initial_step = self.global_step.eval()            
            for i in range(initial_step, n_iters):   
                _ = sess.run(self.optimizer)  # Optimize and update
                generated_img, total_loss = sess.run([self.input_img, 
                                                      self.total_loss])

In [19]:
model = ST('./data/flowers.jpg', 224, 224)
model.build()

VGG-19 pre-trained model is ready.
P:  75623784.0
F:  None
