In [None]:
import os
import sys

import numpy as np 
import random
import time 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import PIL
from PIL import Image
from IPython import display

from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Start with vgg19
from tensorflow.keras.applications import vgg19

In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
else:
    print(gpu_info)

In [None]:
try: # detect TPUs
    # detect and init the TPU
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    # instantiate a distribution strategy
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError: # detect GPUs
    #strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines
    strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # for clusters of multi-GPU machines

AUTO = tf.data.experimental.AUTOTUNE
print("Number of accelerators: ", strategy.num_replicas_in_sync)

IMAGEDIR = 'ParentImages/'
OUTDIR = 'DualImages/'

In [None]:
def tensor_to_image(tensor):
    tensor = tensor*255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor)>3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return PIL.Image.fromarray(tensor)

def load_img_x(path_to_img):
    max_dim = 512
    img = tf.io.read_file(path_to_img)
    img = tf.image.decode_image(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)

    #resize
    shape = tf.cast(tf.shape(img)[:-1], tf.float32)
    long_dim = max(shape)
    scale = max_dim / long_dim
    new_shape = tf.cast(shape * scale, tf.int32)
    img = tf.image.resize(img, new_shape)
    img = img[tf.newaxis, :]
    return img

In [None]:
# this is the primary image to explore
content_path = IMAGEDIR+'Bacchus.jpg'

content = load_img_x(content_path)
tnew = vgg19.preprocess_input(content*255)
tnew = tf.image.resize(tnew, (224, 224))
# first test full VGG19
model = vgg19.VGG19(include_top=True, weights='imagenet')
probs = model(tnew)
probs.shape
top_5 = vgg19.decode_predictions(probs.numpy())[0]
[(class_name, prob) for (number, class_name, prob) in top_5]

In [None]:
# well, he is a welshie, but we can forgive mistaking him for an airedale
model = vgg19.VGG19(include_top=False, weights='imagenet')

In [None]:
# loss functions
## content loss
def mat_loss(target, combo, scale = 1):
    # this is just pixel distance (squared)
    return K.sum(K.square(target-combo))/scale 

""" The gram matrix captures the style of an image.
    In essence it takes a pixel image h x w x n_f, where h and w are the height and width of 
    the image, and converts it into a n_f x n_f matrix that is a measure of may many of the 
    layer features have been represented in that image.  Minimizing the gram matrix distances 
    between the style and combo will be this loss.
"""
def gram_matrix(x):
    return tf.linalg.einsum('bijc,bijd->bcd', x, x)

## style loss
## in practice, style loss is computed using mat_loss above
def style_loss(style, combo):
    gram_sty = gram_matrix(style)
    gram_com = gram_matrix(combo)
    return K.sum(K.square(gram_sty-gram_com))

## total variational loss
## encourages pixel continuity in the combo image
def variational_loss(combo, diag_weight = 0.5):
    # this is just pixel distance (squared)
    A = K.square(combo[:,:-1,1:,:] - combo[:,1:,1:,:])
    B = K.square(combo[:,1:,:-1,:] - combo[:,1:,1:,:])
    C = K.square(combo[:,:-1,:-1,:] - combo[:,1:,1:,:])
    D = K.square(combo[:,:-1,1:,:] - combo[:,1:,:-1,:])
    # several sources raise this to the 1.25 power - I have no idea why and have been unable
    # to locate an original source
    return K.sum(A+B+diag_weight*(C+D))

# clip to float 0-1
def clip_f(image):
    return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

# Dual style transfer
Transfer the coarse features from one souce image and the fine features from another source

In [None]:
# what are the vgg19 layer names
model.summary()

In [None]:
layer_dict = dict([(layer.name,layer.output) for layer in model.layers])
fine_style_layers = ['block1_conv1','block1_conv2','block2_conv2']
coarse_style_layers = ['block3_conv2','block3_conv4','block4_conv1',
                       'block4_conv3','block5_conv1','block5_conv2']
content_layers = ['block5_conv3']

num_content_layers = len(content_layers)
num_fine_style_layers = len(fine_style_layers)
num_coarse_style_layers = len(coarse_style_layers)

# get untrainable vgg19 for layer extraction
def vgg_layers(layer_names):
    vgg = vgg19.VGG19(include_top=False, weights='imagenet')
    vgg.trainable = False
    outputs = [vgg.get_layer(name).output for name in layer_names]
    model = Model([vgg.input], outputs)
    return model

# this is the heavy lifter (adapted from tf website)
class DualNeuralStyleTransferModel(Model):
    def __init__(self, fine_style_layers, coarse_style_layers, content_layers):
        super(DualNeuralStyleTransferModel, self).__init__()
        self.vgg = vgg_layers(fine_style_layers + coarse_style_layers + content_layers)
        self.fine_style_layers = fine_style_layers
        self.coarse_style_layers = coarse_style_layers
        self.content_layers = content_layers
        self.num_fine_style_layers = len(fine_style_layers)
        self.num_coarse_style_layers = len(coarse_style_layers)
        self.num_content_layers = len(content_layers)
        self.vgg.trainable = False

    def call(self, inputs):
        #float input in [0,1]
        inputs = inputs*255.0
        preprocessed_input = vgg19.preprocess_input(inputs)
        outputs = self.vgg(preprocessed_input)
        nl1 = self.num_fine_style_layers
        nl2 = nl1 + self.num_coarse_style_layers
        fine_style_outputs, coarse_style_outputs, content_outputs = (outputs[:nl1], outputs[nl1:nl2], outputs[nl2:])

        # get gram matrix for style, combine with combo image later
        fine_style_outputs = [gram_matrix(f_style_output)
                        for f_style_output in fine_style_outputs]

        coarse_style_outputs = [gram_matrix(c_style_output)
                        for c_style_output in coarse_style_outputs]

        content_dict = {content_name:value 
                        for content_name, value 
                        in zip(self.content_layers, content_outputs)}

        fine_style_dict = {style_name:value
                    for style_name, value
                    in zip(self.fine_style_layers, fine_style_outputs)}

        coarse_style_dict = {style_name:value
                    for style_name, value
                    in zip(self.coarse_style_layers, coarse_style_outputs)}
    
        return {'content':content_dict, 'fstyle':fine_style_dict, 'cstyle':coarse_style_dict }

# define the extractor model
dual_extractor = DualNeuralStyleTransferModel(fine_style_layers, coarse_style_layers, content_layers)

In [None]:
def update_weight_dictionary(outs,weight_dict,var_factor,regulator,meanofall):
    net_mean=0
    ix = 0
    for name, output in sorted(outs.items()):
        xr = output.numpy().shape
        n_c = 1
        for x in xr:
            n_c *= x 
        weight = output.numpy().mean()**2 * n_c / var_factor
        net_mean += output.numpy().mean()
        ix+=1
        if weight == 0: 
            weight = regulator / var_factor
        weight_dict[name] = weight
    if meanofall:
        for name, output in sorted(outs.items()):
            xr = output.numpy().shape
            n_c = 1
            for x in xr:
                n_c *= x 
            weight_dict[name] = netmean**2 * n_c / (var_factor * ix**2)

def get_mean_weights_dual(fs_out, cs_out, c_out, combo_image, regulator=0.1,meanofall=False):
    # compute the mean of style, content and var, and use that to weight the values
    #size of combo image
    xshape = tf.shape(combo_image)
    dof = np.float32(xshape[-3]*xshape[-2]*xshape[-1])
    # this is the factor against which all others will be normalized
    var_factor = variational_loss(combo_image)/dof + 1e-8
    # define the dictionary
    weight_dict = {"variation": 1}
    # compute fstyle weights
    update_weight_dictionary(fs_out,weight_dict,var_factor,regulator,meanofall)
    update_weight_dictionary(cs_out,weight_dict,var_factor,regulator,meanofall)
    update_weight_dictionary(c_out,weight_dict,var_factor,regulator,meanofall)
    # compute content weights
    return weight_dict

# this is our master function: path it a content and style image, and enjoy 
def build_dual_image(content_path,fine_style_path,coarse_style_path, weights = None, powers = None, abort_thresh = 0.0001,
                beta1 = 0.99, beta2 = 0.999, lr = 0.01, epochs=10, useMeanWeighting = False, contentStart = True):
    c_w,fs_w,cs_w,v_w = 1,1,1,1
    c_p,fs_p,cs_p,v_p = 1,1,1,1
    if weights is not None:
        [c_w,fs_w,cs_w,v_w]=weights
    if powers is not None:
        [c_p,fs_p,cs_p,v_p]=powers

    content_image = load_img_x(content_path)
    xshape = tf.shape(content_image)
    dof = np.float32(xshape[-3]*xshape[-2]*xshape[-1])
    dofsq = dof**2
    dofquart = dof**4
    fstyle_image = tf.image.resize(load_img_x(fine_style_path),(xshape[1],xshape[2]))
    cstyle_image = tf.image.resize(load_img_x(coarse_style_path),(xshape[1],xshape[2]))

    # Extract base image vgg outputs only once - they won't change 
    fstyle_outs = dual_extractor(fstyle_image)['fstyle']
    cstyle_outs = dual_extractor(cstyle_image)['cstyle']
    content_outs = dual_extractor(content_image)['content']
    combo_image = tf.Variable(content_image)
    
    if contentStart == False:
        combo_image = tf.Variable(tf.random.uniform(shape = xshape))

    weight_dict = get_mean_weights_dual(fstyle_outs, cstyle_outs,content_outs,combo_image)
    if useMeanWeighting:
        weight_dict = get_mean_weights_dual(fstyle_outs, cstyle_outs,content_outs,combo_image,meanofall=True)


    # note: insanely sensitive to hyperparameter choices
    # the original paper uses L-BFGS
    opt = tf.optimizers.Adam(learning_rate=lr, beta_1=beta1, beta_2=beta2, epsilon=1e-7)
    
    def total_loss(combo):
        # using adam, this won't matter, if SGD, dial it
        scale = 1
        # get the style and content for the combined image
        outputs = dual_extractor(combo)
        fstyle_outputs = outputs['fstyle']
        cstyle_outputs = outputs['cstyle']
        content_outputs = outputs['content']

        # compute fine style loss on all fine style layers
        fstyle_loss = tf.add_n([mat_loss(fstyle_outputs[name],fstyle_outs[name], scale=weight_dict[name])
                                for name in fstyle_outputs.keys()])
        # compute coarse style loss on all coarse style layers
        cstyle_loss = tf.add_n([mat_loss(cstyle_outputs[name],cstyle_outs[name], scale=weight_dict[name])
                                for name in cstyle_outputs.keys()])
        # compute content loss on all content layers
        content_loss = tf.add_n([mat_loss(content_outputs[name],content_outs[name], scale=weight_dict[name])
                                    for name in content_outputs.keys()])
        # compute variational loss on the combo image
        var_loss = variational_loss(combo) / dof
        # join all losses using specified weights and powers
        loss = fs_w * (fstyle_loss ** fs_p) + cs_w * (cstyle_loss ** cs_p) + c_w * (content_loss ** c_p) + v_w * (var_loss ** v_p)
        return scale * loss

    #@tf.function()
    def train_step(im):
        # GradientTape context tracks coputations for gradient back-prop
        with tf.GradientTape() as tape:
            loss = total_loss(im)
        grad = tape.gradient(loss, im)
        # using Adam here (perhaps recklessly)
        opt.apply_gradients([(grad, im)])
        # clip back to [0-1] range
        im.assign(clip_f(im))
        return loss

    start = time.time()
    steps_per_epoch = 50
    step = 0
    losses = []
    # copy old combo image for early loop exit
    oldim = tf.Variable(combo_image)
    # epoch loop
    for n in range(epochs):
        for m in range(steps_per_epoch):
            step += 1
            losses.append(train_step(combo_image))
            print(".", end='')
        # clear output and display tensoe
        display.clear_output(wait=True)
        display.display(tensor_to_image(combo_image))
        #print(mat_loss(oldim,combo_image)/dof)
        # if image is hardly changing, break loop (abort_thresh ~ 1e-4 is good)
        # if using a low lr, decrease or set to zero
        if mat_loss(oldim,combo_image)/dof < abort_thresh:
            break;
        # set for next rough
        oldim =  tf.Variable(combo_image)
    end = time.time()
    print("Total time: {:.1f}".format(end-start))
    return combo_image, losses

# Generation

lr, Beta1, and fs,cs,c,v_w,p are the important paramters  
This is a bit more finicky than the single transfer - some combinations don't work well, also what we suspect is fine (colors, small patterns) vs coarse (shapes, larger patterns) doesn't always work like expected   
Basic plan:  
1) leave lr / beta_1 at defaults, increasing lr or beta_1 can blur out patches, takes longer if decreased  
2) leave v_w and v_p at 1 unless image is too blurry or pixelated (if pixelated, up to 10, if still bad, this is likely a consequence of the pairing)  
3) c_w = 0, c_p = 1  
4) fs_p, cs_p = 1  
5) fs_w, cs_w = 1, if we want to in/decrease coarse vs fine, we can move these up or down

In [None]:
content_path = IMAGEDIR + 'Bacchus.jpg'
style_path = IMAGEDIR + 'Gleizes_The_Bridges_of_Paris.jpg'
style_path_0 = IMAGEDIR + 'Blue_Water_Lilies_Monet.jpg'
style_path_1 = IMAGEDIR + 'Delaunay_Window_on_the_City.jpg'
style_path_2 = IMAGEDIR + 'Christ_in_Limbo.jpg'
style_path_3 = IMAGEDIR + 'Kandinsky_Composition_7.jpg'
style_path_4 = IMAGEDIR + 'Babel_Bruegel.jpg'
style_path_5 = IMAGEDIR + 'Wreckers_Coast_of_Northumberland_JMWTurner.jpg'
style_path_6 = IMAGEDIR + 'Water_Lily_Pond_Monet.jpg'
style_path_7 = IMAGEDIR + 'Tondals_Vision.jpg'
style_path_8 = IMAGEDIR + 'The_Triumph_of_Death_Bruegel.jpg'
style_path_9 = IMAGEDIR + 'Metzinger_Two_Nudes.jpg'

[c_w,fs_w,cs_w,v_w] = [0,1,1,1]
[c_p,fs_p,cs_p,v_p] = [1,1,1,1]
[lr,beta1,beta2,epochs]=[0.02,0.98,0.9999,15]
newim, losses = build_dual_image(content_path,style_path_4,style_path_2,
                                 lr=lr,beta1=beta1,beta2=beta2,epochs=epochs,
                                 weights=[c_w,fs_w,cs_w,v_w],powers=[c_p,fs_p,cs_p,v_p])

In [None]:
filename = 'Bacchus_Babel_Limbo.jpg'
plt.imsave(OUTDIR+filename,np.array(newim[0]))