In [1]:
!git clone https://github.com/dsloet/StyleTransfer.git

Cloning into 'StyleTransfer'...
remote: Enumerating objects: 38, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (34/34), done.[K
remote: Total 38 (delta 10), reused 19 (delta 3), pack-reused 0[K
Unpacking objects: 100% (38/38), done.


In [2]:
%cd StyleTransfer/

/content/StyleTransfer


In [3]:
%ls

[0m[01;34mimages[0m/  README.md  requirements.txt  [01;34mstyle_keras[0m/  [01;34mstyle_tensorflow[0m/


In [0]:
%mkdir style_keras/output_images

In [5]:
%tensorflow_version 1.x
from style_keras.StyleTransferKeras import content_loss, gram_matrix, style_loss, total_variation_loss
from style_keras.StyleTransferKeras import test_content_loss
from style_keras.utils_keras import preprocess_image, deprocess_image

Using TensorFlow backend.


In [6]:
test_content_loss()





Result:  605.6219
Expected result:  605.6219


In [0]:
import time
import numpy as np

from keras import backend as K
from keras.applications import vgg19
from keras.preprocessing.image import load_img

#from scipy.misc import imsave
import imageio
from scipy.optimize import fmin_l_bfgs_b


In [0]:
def style_transfer(base_img_path, style_img_path, output_img_path, convnet='vgg19', 
        content_weight=3e-2, style_weights=(20000, 500, 12, 1, 1), tv_weight=5e-2, content_layer='block4_conv2', 
        style_layers=['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1'], iterations=50):
    
    print('\nInitializing Neural Style model...')

    # Determine the image sizes. Fix the output size from the content image.
    print('\n\tResizing images...')
    width, height = load_img(base_img_path).size
    new_dims = (height, width)

    # Preprocess content and style images. Resizes the style image if needed.
    content_img = K.variable(preprocess_image(base_img_path, new_dims))
    style_img = K.variable(preprocess_image(style_img_path, new_dims))

    # Create an output placeholder with desired shape.
    # It will correspond to the generated image after minimizing the loss function.
    output_img = K.placeholder((1, height, width, 3))
    
    # Sanity check on dimensions
    print("\tSize of content image is: {}".format(K.int_shape(content_img)))
    print("\tSize of style image is: {}".format(K.int_shape(style_img)))
    print("\tSize of output image is: {}".format(K.int_shape(output_img)))

    # Combine the 3 images into a single Keras tensor, for ease of manipulation
    # The first dimension of a tensor identifies the example/input.
    input_img = K.concatenate([content_img, style_img, output_img], axis=0)

    # Initialize the vgg16 model
    print('\tLoading {} model'.format(convnet.upper()))

    if convnet == 'vgg16':
        model = vgg16.VGG16(input_tensor=input_img, weights='imagenet', include_top=False)
    else:
        model = vgg19.VGG19(input_tensor=input_img, weights='imagenet', include_top=False)
        
    print('\tComputing losses...')
    # Get the symbolic outputs of each "key" layer (they have unique names).
    # The dictionary outputs an evaluation when the model is fed an input.
    outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])

    # Extract features from the content layer
    content_features = outputs_dict[content_layer]

    # Extract the activations of the base image and the output image
    base_image_features = content_features[0, :, :, :]  # 0 corresponds to base
    combination_features = content_features[2, :, :, :] # 2 coresponds to output

    # Calculate the feature reconstruction loss
    contnt_loss = content_weight * content_loss(base_image_features, combination_features)

    # For each style layer compute style loss
    # The total style loss is the weighted sum of those losses
    temp_style_loss = K.variable(0.0)       # we update this variable in the loop
    weight = 1.0 / float(len(style_layers))
    
    for i, layer in enumerate(style_layers):
        # extract features of given layer
        style_features = outputs_dict[layer]
        # from those features, extract style and output activations
        style_image_features = style_features[1, :, :, :]   # 1 corresponds to style image
        output_style_features = style_features[2, :, :, :]  # 2 coresponds to generated image
        temp_style_loss += style_weights[i] * weight * \
                    style_loss(style_image_features, output_style_features)
    styleloss = temp_style_loss

    # Compute total variational loss.
    tv_loss = tv_weight * total_variation_loss(output_img)

    # Composite loss
    total_loss = contnt_loss + styleloss + tv_loss
    
    # Compute gradients of output img with respect to total_loss
    print('\tComputing gradients...')
    grads = K.gradients(total_loss, output_img)
    
    outputs = [total_loss] + grads
    loss_and_grads = K.function([output_img], outputs)  
    
    # Initialize the generated image from random noise
    x = np.random.uniform(0, 255, (1, height, width, 3)) - 128.
    
    # Loss function that takes a vectorized input image, for the solver
    def loss(x):
        x = x.reshape((1, height, width, 3))   # reshape
        return loss_and_grads([x])[0]
    
    # Gradient function that takes a vectorized input image, for the solver
    def grads(x):
        x = x.reshape((1, height, width, 3))   # reshape
        return loss_and_grads([x])[1].flatten().astype('float64')
    
    # Fit over the total iterations
    for i in range(iterations+1):
        print('\n\tIteration: {}'.format(i+1))

        toc = time.time()
        x, min_val, info = fmin_l_bfgs_b(loss, x.flatten(), fprime=grads, maxfun=20)

        # save current generated image
        if i%10 == 0:
            img = deprocess_image(x.copy(), height, width)
            fname = output_img_path + '_at_iteration_%d.png' % (i)
            imageio.imsave(fname, img)
            print('\t\tImage saved as', fname)

        tic = time.time()

        print('\t\tLoss: {:.2e}, Time: {} seconds'.format(float(min_val), float(tic-toc)))

In [11]:
params = {
'base_img_path' : 'images/dancing.jpg', 
'style_img_path' : 'images/picasso.jpg', 
'output_img_path' : 'style_keras/output_images/dance_picasso2', 
'convnet' : 'vgg19', 
'content_weight' : 5, 
'style_weights' : (10, 10, 50, 10, 10),
'tv_weight' : 200, 
'content_layer' : 'block3_conv3', 
'style_layers' : ['block1_conv1',
                  'block2_conv2',
                  'block4_conv4'], 
'iterations' : 50
}

style_transfer(**params)


Initializing Neural Style model...

	Resizing images...
	Size of content image is: (1, 444, 444, 3)
	Size of style image is: (1, 444, 444, 3)
	Size of output image is: (1, 444, 444, 3)
	Loading VGG19 model
	Computing losses...
	Computing gradients...

	Iteration: 1
		Image saved as style_keras/output_images/dance_picasso2_at_iteration_0.png
		Loss: 4.41e+12, Time: 9.91310167312622 seconds

	Iteration: 2
		Loss: 2.84e+12, Time: 9.769203662872314 seconds

	Iteration: 3
		Loss: 2.03e+12, Time: 9.958154678344727 seconds

	Iteration: 4
		Loss: 1.59e+12, Time: 9.921135902404785 seconds

	Iteration: 5
		Loss: 1.31e+12, Time: 9.873176336288452 seconds

	Iteration: 6
		Loss: 1.13e+12, Time: 9.862599849700928 seconds

	Iteration: 7
		Loss: 9.83e+11, Time: 9.895923376083374 seconds

	Iteration: 8
		Loss: 8.92e+11, Time: 9.833572387695312 seconds

	Iteration: 9
		Loss: 8.32e+11, Time: 9.755945920944214 seconds

	Iteration: 10
		Loss: 7.82e+11, Time: 9.925970792770386 seconds

	Iteration: 11
		Ima

KeyboardInterrupt: ignored

In [9]:
params = {
'base_img_path' : 'images/dancing.jpg', 
'style_img_path' : 'images/picasso.jpg', 
'output_img_path' : 'style_keras/output_images/dance_picasso', 
'convnet' : 'vgg19', 
'content_weight' : 500, 
'style_weights' : (10, 10, 50, 10, 10),
'tv_weight' : 200, 
'content_layer' : 'block4_conv2', 
'style_layers' : ['block1_conv1',
                  'block2_conv1',
                  'block3_conv1', 
                  'block4_conv1', 
                  'block5_conv1'], 
'iterations' : 50
}

style_transfer(**params)


Initializing Neural Style model...

	Resizing images...

	Size of content image is: (1, 444, 444, 3)
	Size of style image is: (1, 444, 444, 3)
	Size of output image is: (1, 444, 444, 3)
	Loading VGG19 model



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


	Computing losses...
	Computing gradients...

	Iteration: 1
		Image saved as style_keras/output_images/dance_picasso_at_iteration_0.png
		Loss: 9.69e+14, Time: 19.76012396812439 seconds

	Iteration: 2
		Loss: 4.25e+14, Time: 10.063912868499756 seconds

	Iteration: 3
		Loss: 2.98e+14, Time: 10.210342168807983 seconds

	Iteration: 4
		Loss: 2.40e+14, Time: 10.213589429855347 seconds

	Iteration: 5
		Loss: 2.05e+14, Time: 10.241263628005981 seconds

	Iteration: 6
		Loss: 1.82e+14, Time: 10.46665644645691 seconds

	Iteration: 7
		Loss: 1.64e+14, Time: 10.527631282806396 seconds

	Iteration: 8
		Loss: 1.51e+14, Time: 10.521090269088745 seco

KeyboardInterrupt: ignored