In [1]:
import tensorflow as tf
import numpy as np 
import scipy.io  
import argparse
import datetime
import struct
import string
import random
import errno
import time                       
import cv2
import os

Utils for building the model

In [2]:
pooling_type = 'avg'

def conv_layer(layer_name, layer_input, W):
    conv = tf.nn.conv2d(layer_input, W, strides=[1, 1, 1, 1], padding='SAME')
    if True: print('--{} | shape={} | weights_shape={}'.format(layer_name, 
        conv.get_shape(), W.get_shape()))
    return conv

def relu_layer(layer_name, layer_input, b):
    relu = tf.nn.relu(layer_input + b)
    return relu

def pool_layer(layer_name, layer_input):
    if pooling_type == 'avg':
        pool = tf.nn.avg_pool(layer_input, ksize=[1, 2, 2, 1], 
            strides=[1, 2, 2, 1], padding='SAME')
    elif pooling_type == 'max':
        pool = tf.nn.max_pool(layer_input, ksize=[1, 2, 2, 1], 
            strides=[1, 2, 2, 1], padding='SAME')
    return pool

def get_weights(vgg_layers, i):
    weights = vgg_layers[i][0][0][2][0][0]
    W = tf.constant(weights)
    return W

def get_bias(vgg_layers, i):
    bias = vgg_layers[i][0][0][2][0][1]
    b = tf.constant(np.reshape(bias, (bias.size)))
    return b

Function to build the vgg19 model

In [3]:
def build_model(input_img):
    if True: print('\nBUILDING VGG-19 NETWORK')
    net = {}
    _, h, w, d     = input_img.shape

    if True: print('loading model weights...')
    vgg_rawnet     = scipy.io.loadmat('imagenet-vgg-verydeep-19.mat')
    vgg_layers     = vgg_rawnet['layers'][0]
    if True: print('constructing layers...')
    net['input']   = tf.Variable(np.zeros((1, h, w, d), dtype=np.float32))

    if True: print('LAYER GROUP 1')
    net['conv1_1'] = conv_layer('conv1_1', net['input'], W=get_weights(vgg_layers, 0))
    net['relu1_1'] = relu_layer('relu1_1', net['conv1_1'], b=get_bias(vgg_layers, 0))

    net['conv1_2'] = conv_layer('conv1_2', net['relu1_1'], W=get_weights(vgg_layers, 2))
    net['relu1_2'] = relu_layer('relu1_2', net['conv1_2'], b=get_bias(vgg_layers, 2))

    net['pool1']   = pool_layer('pool1', net['relu1_2'])

    if True: print('LAYER GROUP 2')  
    net['conv2_1'] = conv_layer('conv2_1', net['pool1'], W=get_weights(vgg_layers, 5))
    net['relu2_1'] = relu_layer('relu2_1', net['conv2_1'], b=get_bias(vgg_layers, 5))

    net['conv2_2'] = conv_layer('conv2_2', net['relu2_1'], W=get_weights(vgg_layers, 7))
    net['relu2_2'] = relu_layer('relu2_2', net['conv2_2'], b=get_bias(vgg_layers, 7))

    net['pool2']   = pool_layer('pool2', net['relu2_2'])

    if True: print('LAYER GROUP 3')
    net['conv3_1'] = conv_layer('conv3_1', net['pool2'], W=get_weights(vgg_layers, 10))
    net['relu3_1'] = relu_layer('relu3_1', net['conv3_1'], b=get_bias(vgg_layers, 10))

    net['conv3_2'] = conv_layer('conv3_2', net['relu3_1'], W=get_weights(vgg_layers, 12))
    net['relu3_2'] = relu_layer('relu3_2', net['conv3_2'], b=get_bias(vgg_layers, 12))

    net['conv3_3'] = conv_layer('conv3_3', net['relu3_2'], W=get_weights(vgg_layers, 14))
    net['relu3_3'] = relu_layer('relu3_3', net['conv3_3'], b=get_bias(vgg_layers, 14))

    net['conv3_4'] = conv_layer('conv3_4', net['relu3_3'], W=get_weights(vgg_layers, 16))
    net['relu3_4'] = relu_layer('relu3_4', net['conv3_4'], b=get_bias(vgg_layers, 16))

    net['pool3']   = pool_layer('pool3', net['relu3_4'])

    if True: print('LAYER GROUP 4')
    net['conv4_1'] = conv_layer('conv4_1', net['pool3'], W=get_weights(vgg_layers, 19))
    net['relu4_1'] = relu_layer('relu4_1', net['conv4_1'], b=get_bias(vgg_layers, 19))

    net['conv4_2'] = conv_layer('conv4_2', net['relu4_1'], W=get_weights(vgg_layers, 21))
    net['relu4_2'] = relu_layer('relu4_2', net['conv4_2'], b=get_bias(vgg_layers, 21))

    net['conv4_3'] = conv_layer('conv4_3', net['relu4_2'], W=get_weights(vgg_layers, 23))
    net['relu4_3'] = relu_layer('relu4_3', net['conv4_3'], b=get_bias(vgg_layers, 23))

    net['conv4_4'] = conv_layer('conv4_4', net['relu4_3'], W=get_weights(vgg_layers, 25))
    net['relu4_4'] = relu_layer('relu4_4', net['conv4_4'], b=get_bias(vgg_layers, 25))

    net['pool4']   = pool_layer('pool4', net['relu4_4'])

    if True: print('LAYER GROUP 5')
    net['conv5_1'] = conv_layer('conv5_1', net['pool4'], W=get_weights(vgg_layers, 28))
    net['relu5_1'] = relu_layer('relu5_1', net['conv5_1'], b=get_bias(vgg_layers, 28))

    net['conv5_2'] = conv_layer('conv5_2', net['relu5_1'], W=get_weights(vgg_layers, 30))
    net['relu5_2'] = relu_layer('relu5_2', net['conv5_2'], b=get_bias(vgg_layers, 30))

    net['conv5_3'] = conv_layer('conv5_3', net['relu5_2'], W=get_weights(vgg_layers, 32))
    net['relu5_3'] = relu_layer('relu5_3', net['conv5_3'], b=get_bias(vgg_layers, 32))

    net['conv5_4'] = conv_layer('conv5_4', net['relu5_3'], W=get_weights(vgg_layers, 34))
    net['relu5_4'] = relu_layer('relu5_4', net['conv5_4'], b=get_bias(vgg_layers, 34))

    net['pool5']   = pool_layer('pool5', net['relu5_4'])

    return net

Content loss function

In [4]:
content_loss_function = 1

def content_layer_loss(p, x):
    _, h, w, d = p.get_shape()
    M = h.value * w.value
    N = d.value
    if content_loss_function   == 1:
        K = 1. / (2. * N**0.5 * M**0.5)
    elif content_loss_function == 2:
        K = 1. / (N * M)
    elif content_loss_function == 3:  
        K = 1. / 2.
    loss = K * tf.reduce_sum(tf.pow((x - p), 2))
    return loss

def sum_content_losses(sess, net, content_img):
    sess.run(net['input'].assign(content_img))
    content_loss = 0.
    for layer, weight in zip(content_layers, content_layer_weights):
        p = sess.run(net[layer])
        x = net[layer]
        p = tf.convert_to_tensor(p)
        content_loss += content_layer_loss(p, x) * weight
    content_loss /= float(len(content_layers))
    return content_loss

Style loss function

In [5]:
def style_layer_loss(a, x):
    _, h, w, d = a.get_shape()
    M = h.value * w.value
    N = d.value
    A = gram_matrix(a, M, N)
    G = gram_matrix(x, M, N)
    loss = (1./(4 * N**2 * M**2)) * tf.reduce_sum(tf.pow((G - A), 2))
    
    return loss

def gram_matrix(x, area, depth):
    F = tf.reshape(x, (area, depth))
    G = tf.matmul(tf.transpose(F), F)
    
    return G

def sum_style_losses(sess, net, style_imgs):
    total_style_loss = 0.
    weights = [1.0]
    
    for img, img_weight in zip(style_imgs, weights):
        sess.run(net['input'].assign(img))
        style_loss = 0.

        for layer, weight in zip(style_layers, style_layer_weights):
            a = sess.run(net[layer])
            x = net[layer]
            a = tf.convert_to_tensor(a)
            style_loss += style_layer_loss(a, x) * weight
            
        style_loss /= float(len(style_layers))
        total_style_loss += (style_loss * img_weight)
        
    total_style_loss /= float(len(style_imgs))
    
    return total_style_loss

Utils and io functions

In [6]:
def read_image(path):
    # bgr image
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    check_image(img, path)
    img = img.astype(np.float32)
    img = preprocess(img)
    return img

def write_image(path, img):
    img = postprocess(img)
    cv2.imwrite(path, img)

def preprocess(img):
    # bgr to rgb
    img = img[...,::-1]
    # shape (h, w, d) to (1, h, w, d)
    img = img[np.newaxis,:,:,:]
    img -= np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
    return img

def postprocess(img):
    img += np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
    # shape (1, h, w, d) to (h, w, d)
    img = img[0]
    img = np.clip(img, 0, 255).astype('uint8')
    # rgb to bgr
    img = img[...,::-1]
    return img

def read_flow_file(path):
    with open(path, 'rb') as f:
        # 4 bytes header
        header = struct.unpack('4s', f.read(4))[0]
        # 4 bytes width, height    
        w = struct.unpack('i', f.read(4))[0]
        h = struct.unpack('i', f.read(4))[0]   
        flow = np.ndarray((2, h, w), dtype=np.float32)
        for y in range(h):
            for x in range(w):
                flow[0,y,x] = struct.unpack('f', f.read(4))[0]
                flow[1,y,x] = struct.unpack('f', f.read(4))[0]
    return flow

def read_weights_file(path):
    lines = open(path).readlines()
    header = list(map(int, lines[0].split(' ')))
    w = header[0]
    h = header[1]
    vals = np.zeros((h, w), dtype=np.float32)
    for i in range(1, len(lines)):
        line = lines[i].rstrip().split(' ')
        vals[i-1] = np.array(list(map(np.float32, line)))
        vals[i-1] = list(map(lambda x: 0. if x < 255. else 1., vals[i-1]))
    # expand to 3 channels
    weights = np.dstack([vals.astype(np.float32)] * 3)
    return weights

def normalize(weights):
    denom = sum(weights)
    if denom > 0.:
        return [float(i) / denom for i in weights]
    else: return [0.] * len(weights)

def maybe_make_directory(dir_path):
    if not os.path.exists(dir_path):  
        os.makedirs(dir_path)

def check_image(img, path):
    if img is None:
        raise OSError(errno.ENOENT, "No such file", path)
        
def get_content_image(content_img):
    path = os.path.join('./', content_img)
      # bgr image
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    check_image(img, path)
    img = img.astype(np.float32)
    h, w, d = img.shape
    mx = 512
    # resize if > max size
    if h > w and h > mx:
        w = (float(mx) / float(h)) * w
        img = cv2.resize(img, dsize=(int(w), mx), interpolation=cv2.INTER_AREA)
    if w > mx:
        h = (float(mx) / float(w)) * h
        img = cv2.resize(img, dsize=(mx, int(h)), interpolation=cv2.INTER_AREA)
    img = preprocess(img)
    return img

def get_style_images(content_img, style_images):
    _, ch, cw, cd = content_img.shape
    style_imgs = []
    for style_fn in style_images:
        path = os.path.join('./', style_fn)
        # bgr image
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        check_image(img, path)
        img = img.astype(np.float32)
        img = cv2.resize(img, dsize=(cw, ch), interpolation=cv2.INTER_AREA)
        img = preprocess(img)
        style_imgs.append(img)
    return style_imgs

def get_noise_image(noise_ratio, content_img):
    np.random.seed(10)
    noise_img = np.random.uniform(-20., 20., content_img.shape).astype(np.float32)
    img = noise_ratio * noise_img + (1. - noise_ratio) * content_img
    return img

def convert_to_original_colors(content_img, stylized_img):
    content_img  = postprocess(content_img)
    stylized_img = postprocess(stylized_img)
    if color_convert_type == 'yuv':
        cvt_type = cv2.COLOR_BGR2YUV
        inv_cvt_type = cv2.COLOR_YUV2BGR
    elif color_convert_type == 'ycrcb':
        cvt_type = cv2.COLOR_BGR2YCR_CB
        inv_cvt_type = cv2.COLOR_YCR_CB2BGR
    elif color_convert_type == 'luv':
        cvt_type = cv2.COLOR_BGR2LUV
        inv_cvt_type = cv2.COLOR_LUV2BGR
    elif color_convert_type == 'lab':
        cvt_type = cv2.COLOR_BGR2LAB
        inv_cvt_type = cv2.COLOR_LAB2BGR
    content_cvt = cv2.cvtColor(content_img, cvt_type)
    stylized_cvt = cv2.cvtColor(stylized_img, cvt_type)
    c1, _, _ = cv2.split(stylized_cvt)
    _, c2, c3 = cv2.split(content_cvt)
    merged = cv2.merge((c1, c2, c3))
    dst = cv2.cvtColor(merged, inv_cvt_type).astype(np.float32)
    dst = preprocess(dst)
    return dst

def write_image_output(output_img, content_img, style_imgs, init_img):
    out_dir = './'
    maybe_make_directory(out_dir)
    img_path = os.path.join(out_dir, output_name)
    content_path = os.path.join(out_dir, 'content.png')
    init_path = os.path.join(out_dir, 'init.png')

    write_image(img_path, output_img)
    write_image(content_path, content_img)
    write_image(init_path, init_img)
    index = 0
    for style_img in style_imgs:
        path = os.path.join(out_dir, 'style_'+str(index)+'.png')
        write_image(path, style_img)
        index += 1
    
    # save the configuration settings
    out_file = os.path.join(out_dir, 'meta_data.txt')
    f = open(out_file, 'w')
    f.write('image_name: {}\n'.format(output_name))
    f.write('content: {}\n'.format(content_img))
    index = 0
    for style_img, weight in zip(style_imgs, [1.0]):
        f.write('styles['+str(index)+']: {} * {}\n'.format(weight, style_img))
        index += 1    
    f.write('content_weight: {}\n'.format(content_weight))
    f.write('style_weight: {}\n'.format(style_weight))
    f.write('tv_weight: {}\n'.format(tv_weight))
    f.write('content_layers: {}\n'.format(content_layers))
    f.write('style_layers: {}\n'.format(style_layers))
    f.write('optimizer_type: {}\n'.format(optimizer_to_use))
    f.write('max_iterations: {}\n'.format(max_iterations))
    f.close()
    
def get_noise_image(noise_ratio, content_img):
    # np.random.seed(args.seed)
    noise_img = np.random.uniform(-20., 20., content_img.shape).astype(np.float32)
    img = noise_ratio * noise_img + (1.-noise_ratio) * content_img
    return img

Style transfer algorithm

In [7]:
def stylize(content_img, style_imgs, init_img):
    with tf.device('/gpu:0'), tf.Session() as sess:
        net = build_model(content_img)
        
        L_style = sum_style_losses(sess, net, style_imgs)
        L_content = sum_content_losses(sess, net, content_img)
        L_tv = tf.image.total_variation(net['input'])
        
        alpha = content_weight
        beta  = style_weight
        theta = tv_weight
        
        L_total  = alpha * L_content + beta * L_style + theta * L_tv
        
        optimizer = get_optimizer(L_total)

        if optimizer_to_use == 'adam':
            minimize_with_adam(sess, net, optimizer, init_img, L_total)
        elif optimizer_to_use == 'lbfgs':
            minimize_with_lbfgs(sess, net, optimizer, init_img)
        
        output_img = sess.run(net['input'])
        
        if original_colors:
            output_img = convert_to_original_colors(np.copy(content_img), output_img)

        write_image_output(output_img, content_img, style_imgs, init_img)

Optimizer and utils

In [8]:
def minimize_with_lbfgs(sess, net, optimizer, init_img):
    if True: print('\nMINIMIZING LOSS USING: L-BFGS OPTIMIZER')
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    sess.run(net['input'].assign(init_img))
    optimizer.minimize(sess)

def minimize_with_adam(sess, net, optimizer, init_img, loss):
    if True: print('\nMINIMIZING LOSS USING: ADAM OPTIMIZER')
    train_op = optimizer.minimize(loss)
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    sess.run(net['input'].assign(init_img))
    iterations = 0
    while (iterations < max_iterations):
        sess.run(train_op)
        if iterations % 25 == 0 or iterations == max_iterations - 1:
            curr_loss = loss.eval()
            print("At iterate {}\tf=  {}".format(iterations, curr_loss))
        iterations += 1

def get_optimizer(loss):
    print_iterations = 25
    if optimizer_to_use == 'lbfgs':
        optimizer = tf.contrib.opt.ScipyOptimizerInterface(
            loss, method='L-BFGS-B',
            options={'maxiter': max_iterations, 'disp': print_iterations})
    elif optimizer_to_use == 'adam':
        optimizer = tf.train.AdamOptimizer(1)
    return optimizer

Image render

In [11]:
content_weight = 5e1;
style_weight = 1e4;
tv_weight = 1e0;

style_layers = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1']
style_layer_weights = [0.2, 0.2, 0.2, 0.2, 0.2]

content_layers = ['conv5_2']
content_layer_weights = [1]

optimizer_to_use = 'lbfgs'
optimizer_to_use = 'adam'

max_iterations = 1000

original_colors = False
color_convert_type = 'luv'

content_image_name = 'nice/cabelo.jpg'
style_image_name = 'images/starry-night.jpg'

In [None]:
time_string = datetime.datetime.fromtimestamp(time.time()).strftime('%m_%d_%H_%M_%S')
random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6))
output_name = 'out_%s_%s.png' % (time_string, random_string)

content_img = get_content_image(content_image_name)
style_imgs = get_style_images(content_img, [style_image_name])
with tf.Graph().as_default():
    print('\n---- RENDERING SINGLE IMAGE ----\n')
    init_img = get_noise_image(1.0, content_img)
    tick = time.time()
    stylize(content_img, style_imgs, init_img)
    tock = time.time()
    print('Single image elapsed time: {}'.format(tock - tick))


---- RENDERING SINGLE IMAGE ----


BUILDING VGG-19 NETWORK
loading model weights...
constructing layers...
LAYER GROUP 1
--conv1_1 | shape=(1, 512, 288, 64) | weights_shape=(3, 3, 3, 64)
--conv1_2 | shape=(1, 512, 288, 64) | weights_shape=(3, 3, 64, 64)
LAYER GROUP 2
--conv2_1 | shape=(1, 256, 144, 128) | weights_shape=(3, 3, 64, 128)
--conv2_2 | shape=(1, 256, 144, 128) | weights_shape=(3, 3, 128, 128)
LAYER GROUP 3
--conv3_1 | shape=(1, 128, 72, 256) | weights_shape=(3, 3, 128, 256)
--conv3_2 | shape=(1, 128, 72, 256) | weights_shape=(3, 3, 256, 256)
--conv3_3 | shape=(1, 128, 72, 256) | weights_shape=(3, 3, 256, 256)
--conv3_4 | shape=(1, 128, 72, 256) | weights_shape=(3, 3, 256, 256)
LAYER GROUP 4
--conv4_1 | shape=(1, 64, 36, 512) | weights_shape=(3, 3, 256, 512)
--conv4_2 | shape=(1, 64, 36, 512) | weights_shape=(3, 3, 512, 512)
--conv4_3 | shape=(1, 64, 36, 512) | weights_shape=(3, 3, 512, 512)
--conv4_4 | shape=(1, 64, 36, 512) | weights_shape=(3, 3, 512, 512)
LAYER GROUP 5
--