In [6]:
"""
Implementation of the "A Neural Algorithm of Artistic Style" in Python using
TensorFlow.
"""
import os
import sys
import numpy as np
import scipy.io
import imageio
import scipy.misc
import tensorflow as tf

###############################################################################
# Constants for the image input and output.
###############################################################################

#networks loss of content : first one layer of conv,mixed and etc(block)
#network loss of style : 4 to backword layer


# Output folder for the images.
OUTPUT_DIR = 'output_face/'
# Style image to use.
# STYLE_IMAGE = 'images/starry_night.jpg'
#CONTENT_IMAGE = 'Macau.jpg'
CONTENT_IMAGE = 's.PNG'
# Content image to use.
# CONTENT_IMAGE = 'images/hong_kong_2.jpg'
#STYLE_IMAGE = 'StarryNight.jpg'
STYLE_IMAGE = 'c.PNG'
# Image dimensions constants. 
from PIL import Image
im = Image.open(CONTENT_IMAGE)
print(im.mode)
width, height = im.size

if(im.mode == "RGBA"):
    background = Image.new("RGB", im.size, (255, 255, 255))
    background.paste(im, mask=im.split()[3]) # 3 is the alpha channel

    background.save(CONTENT_IMAGE)

IMAGE_WIDTH = width
IMAGE_HEIGHT = height
COLOR_CHANNELS = 3

print(IMAGE_WIDTH, IMAGE_HEIGHT)

im = Image.open(STYLE_IMAGE)
im = im.resize((width, height), Image.ANTIALIAS)
if(im.mode == "RGBA"):
    background = Image.new("RGB", im.size, (255, 255, 255))
    background.paste(im, mask=im.split()[3]) # 3 is the alpha channel

    background.save(STYLE_IMAGE)
else:
    im.save(STYLE_IMAGE)

###############################################################################
# Algorithm constants
###############################################################################
# Noise ratio. Percentage of weight of the noise for intermixing with the
# content image.
NOISE_RATIO = 0.6
# Number of iterations to run.
ITERATIONS = 500
# Constant to put more emphasis on content loss.
# Default 5
BETA = 5
# Constant to put more emphasis on style loss.
# Default 100
ALPHA = 100
# Path to the deep learning model. This is more than 500MB so will not be
# included in the repository, but available to download at the model Zoo:
# Link: https://github.com/BVLC/caffe/wiki/Model-Zoo
#
# Pick the VGG 19-layer model by from the paper "Very Deep Convolutional 
# Networks for Large-Scale Image Recognition".
#VGG_MODEL = 'imagenet-vgg-verydeep-19.mat'
VGG_MODEL = 'vgg-face.mat'
# The mean to subtract from the input to the VGG model. This is the mean that
# when the VGG was used to train. Minor changes to this will make a lot of
# difference to the performance of model.
MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))

def generate_noise_image(content_image, noise_ratio = NOISE_RATIO):
    """
    Returns a noise image intermixed with the content image at a certain ratio.
    """
    noise_image = np.random.uniform(
            -20, 20,
            (1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)).astype('float32')
    # White noise image from the content representation. Take a weighted average
    # of the values
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    return input_image

def load_image(path):
    image = imageio.imread(path)
    # Resize the image for convnet input, there is no change but just
    # add an extra dimension.
    image = np.reshape(image, ((1,) + image.shape))
    # Input to the VGG model expects the mean to be subtracted.
    image = image - MEAN_VALUES
    return image

def save_image(path, image):
    # Output should add back the mean.
    image = image + MEAN_VALUES
    # Get rid of the first useless dimension, what remains is the image.
    image = image[0]
    image = np.clip(image, 0, 255).astype('uint8')
    imageio.imsave(path, image)

def load_inception_model(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(
            graph_def,
            input_map=None,
            return_elements=None,
            name="prefix",
            op_dict=None,
            producer_op_list=None
        )
    #op.values() for op in graph.get_operations()
    network = {}
    network['input'] = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)), dtype = 'float32')    
    
    for n in graph.get_operations():
        print(n.name)
        network[n.name] = n
    return network

def load_vgg_model(param_path):
    data = scipy.io.loadmat(param_path)

    # read meta info
    meta = data['meta']
    classes = meta['classes']
    class_names = classes[0][0]['description'][0][0]
    normalization = meta['normalization']
    average_image = np.squeeze(normalization[0][0]['averageImage'][0][0][0][0])
    image_size = np.squeeze(normalization[0][0]['imageSize'][0][0])
    #input_maps = tf.image.resize_images(input_maps, image_size[0], image_size[1])
    
    # read layer info
    layers = data['layers']
    network = {}
    network['input']   = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)), dtype = 'float32')
    current = network['input']
    for layer in layers[0]:
        name = layer[0]['name'][0][0]
        layer_type = layer[0]['type'][0][0]
        if layer_type == 'conv':
            if name[:2] == 'fc':
                padding = 'VALID'
            else:
                padding = 'SAME'
            stride = layer[0]['stride'][0][0]
            kernel, bias = layer[0]['weights'][0][0]
            # kernel = np.transpose(kernel, (1, 0, 2, 3))
            bias = np.squeeze(bias).reshape(-1)
            conv = tf.nn.conv2d(current, tf.constant(kernel),
                                strides=(1, stride[0], stride[0], 1), padding=padding)
            current = tf.nn.bias_add(conv, bias)
            #print name, 'stride:', stride, 'kernel size:', np.shape(kernel)
        elif layer_type == 'relu':
            current = tf.nn.relu(current)
            #print name
        elif layer_type == 'pool':
            stride = layer[0]['stride'][0][0]
            pool = layer[0]['pool'][0][0]
            current = tf.nn.max_pool(current, ksize=(1, pool[0], pool[1], 1),
                                     strides=(1, stride[0], stride[0], 1), padding='SAME')
            #print name, 'stride:', stride
        elif layer_type == 'softmax':
            current = tf.nn.softmax(tf.reshape(current, [-1, len(class_names)]))
            #print name

        print("layer name : "+name)
        network[name] = current

    return network
    
'''
def load_vgg_model(path):
    """
    Returns a model for the purpose of 'painting' the picture.

    Takes only the convolution layer weights and wrap using the TensorFlow
    Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
    the paper indicates that using AveragePooling yields better results.
    The last few fully connected layers are not used.

    Here is the detailed configuration of the VGG model:

        0 is conv1_1 (3, 3, 3, 64)
        1 is relu
        2 is conv1_2 (3, 3, 64, 64)
        3 is relu    
        4 is maxpool
        5 is conv2_1 (3, 3, 64, 128)
        6 is relu
        7 is conv2_2 (3, 3, 128, 128)
        8 is relu
        9 is maxpool
        10 is conv3_1 (3, 3, 128, 256)
        11 is relu
        12 is conv3_2 (3, 3, 256, 256)
        13 is relu
        14 is conv3_3 (3, 3, 256, 256)
        15 is relu
        16 is conv3_4 (3, 3, 256, 256)
        17 is relu
        18 is maxpool
        19 is conv4_1 (3, 3, 256, 512)
        20 is relu
        21 is conv4_2 (3, 3, 512, 512)
        22 is relu
        23 is conv4_3 (3, 3, 512, 512)
        24 is relu
        25 is conv4_4 (3, 3, 512, 512)
        26 is relu
        27 is maxpool
        28 is conv5_1 (3, 3, 512, 512)
        29 is relu
        30 is conv5_2 (3, 3, 512, 512)
        31 is relu
        32 is conv5_3 (3, 3, 512, 512)
        33 is relu
        34 is conv5_4 (3, 3, 512, 512)
        35 is relu
        36 is maxpool
        37 is fullyconnected (7, 7, 512, 4096)
        38 is relu
        39 is fullyconnected (1, 1, 4096, 4096)
        40 is relu
        41 is fullyconnected (1, 1, 4096, 1000)
        42 is softmax
    """
    vgg = scipy.io.loadmat(path)

    vgg_layers = vgg['layers']
    def _weights(layer, expected_layer_name):
        """
        Return the weights and bias from the VGG model for a given layer.
        """
        W = vgg_layers[0][layer][0][0][0][0][0]
        b = vgg_layers[0][layer][0][0][0][0][1]
        layer_name = vgg_layers[0][layer][0][0][-2]
        #assert layer_name == expected_layer_name
        return W, b

    def _relu(conv2d_layer):
        """
        Return the RELU function wrapped over a TensorFlow layer. Expects a
        Conv2d layer input.
        """
        return tf.nn.relu(conv2d_layer)

    def _conv2d(prev_layer, layer, layer_name):
        """
        Return the Conv2D layer using the weights, biases from the VGG
        model at 'layer'.
        """
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(
            prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

    def _conv2d_relu(prev_layer, layer, layer_name):
        """
        Return the Conv2D + RELU layer using the weights, biases from the VGG
        model at 'layer'.
        """
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer):
        """
        Return the AveragePooling layer.
        """
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph model.
    graph = {}
    graph['input']   = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)), dtype = 'float32')
    graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    return graph

FOR VGG 19 IMAGENE
'''
def content_loss_func(sess, model):
    """
    Content loss function as defined in the paper.
    """
    def _content_loss(p, x):
        # N is the number of filters (at layer l).
        N = p.shape[3]
        # M is the height times the width of the feature map (at layer l).
        M = p.shape[1] * p.shape[2]
        # Interestingly, the paper uses this form instead:
        #
        #   0.5 * tf.reduce_sum(tf.pow(x - p, 2)) 
        #
        # But this form is very slow in "painting" and thus could be missing
        # out some constants (from what I see in other source code), so I'll
        # replicate the same normalization constant as used in style loss.
        return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2))
    return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])

def inception_content_loss_func(sess, model):
    """
    Content loss function as defined in the paper.
    """
    def _content_loss(p, x):
        # N is the number of filters (at layer l).
        N = p.shape[3]
        # M is the height times the width of the feature map (at layer l).
        M = p.shape[1] * p.shape[2]
        # Interestingly, the paper uses this form instead:
        #
        #   0.5 * tf.reduce_sum(tf.pow(x - p, 2)) 
        #
        # But this form is very slow in "painting" and thus could be missing
        # out some constants (from what I see in other source code), so I'll
        # replicate the same normalization constant as used in style loss.
        return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2))
    return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])


def style_loss_func(sess, model):
    """
    Style loss function as defined in the paper.
    """
    def _gram_matrix(F, N, M):
        """
        The gram matrix G.
        """
        Ft = tf.reshape(F, (M, N))
        return tf.matmul(tf.transpose(Ft), Ft)

    def _style_loss(a, x):
        """
        The style loss calculation.
        """
        # N is the number of filters (at layer l).
        N = a.shape[3]
        # M is the height times the width of the feature map (at layer l).
        M = a.shape[1] * a.shape[2]
        # A is the style representation of the original image (at layer l).
        A = _gram_matrix(a, N, M)
        # G is the style representation of the generated image (at layer l).
        G = _gram_matrix(x, N, M)
        result = (1 / (4 * N**2 * M**2)) * tf.reduce_sum(tf.pow(G - A, 2))
        return result

    # Layers to use. We will use these layers as advised in the paper.
    # To have softer features, increase the weight of the higher layers
    # (conv5_1) and decrease the weight of the lower layers (conv1_1).
    # To have harder features, decrease the weight of the higher layers
    # (conv5_1) and increase the weight of the lower layers (conv1_1).
    layers = [
        ('conv1_1', 0.5),
        ('conv2_1', 1.0),
        ('conv3_1', 1.5),
        ('conv4_1', 3.0),
        ('conv5_1', 4.0),
    ]

    E = [_style_loss(sess.run(model[layer_name]), model[layer_name]) for layer_name, _ in layers]
    W = [w for _, w in layers]
    loss = sum([W[l] * E[l] for l in range(len(layers))])
    return loss


if __name__ == '__main__':
    with tf.Session() as sess:
        # Load the images.
        content_image = load_image(CONTENT_IMAGE)
        style_image = load_image(STYLE_IMAGE)
        # Load the model.
        
        print('temp load of inception')
        load_inception_model('./20180402-114759/20180402-114759.pb')
        print('end of fucking thing dude')
        
        
        
        print('start model loding')
        
        
        model = load_vgg_model(VGG_MODEL)

        print('end model loding')
        
        # Generate the white noise and content presentation mixed image
        # which will be the basis for the algorithm to "paint".
        input_image = generate_noise_image(content_image)

        print('start model learn')
        
        
        sess.run(tf.global_variables_initializer())
        # Construct content_loss using content_image.
        sess.run(model['input'].assign(content_image))
        content_loss = content_loss_func(sess, model)

        # Construct style_loss using style_image.
        sess.run(model['input'].assign(style_image))
        style_loss = style_loss_func(sess, model)

        
        print('end model learn')
        
        # Instantiate equation 7 of the paper.
        total_loss = BETA * content_loss + ALPHA * style_loss

        # From the paper: jointly minimize the distance of a white noise image
        # from the content representation of the photograph in one layer of
        # the neywork and the style representation of the painting in a number
        # of layers of the CNN.
        #
        # The content is built from one layer, while the style is from five
        # layers. Then we minimize the total_loss, which is the equation 7.
        optimizer = tf.train.AdamOptimizer(2.0)
        train_step = optimizer.minimize(total_loss)

        sess.run(tf.global_variables_initializer())
        sess.run(model['input'].assign(input_image))
        for it in range(ITERATIONS):
            sess.run(train_step)
            if it%100 == 0:
                # Print every 100 iteration.
                mixed_image = sess.run(model['input'])
                print('Iteration %d' % (it))
                print('sum : ', sess.run(tf.reduce_sum(mixed_image)))
                print('cost: ', sess.run(total_loss))

                if not os.path.exists(OUTPUT_DIR):
                    os.mkdir(OUTPUT_DIR)

                filename = OUTPUT_DIR+'%d.png' % (it)
                save_image(filename, mixed_image)

RGB
263 290
temp load of inception
prefix/embeddings/Maximum/y
prefix/embeddings/Sum/reduction_indices
prefix/InceptionResnetV1/Bottleneck/BatchNorm/moving_variance
prefix/InceptionResnetV1/Bottleneck/BatchNorm/moving_variance/read
prefix/InceptionResnetV1/Bottleneck/BatchNorm/moving_mean
prefix/InceptionResnetV1/Bottleneck/BatchNorm/moving_mean/read
prefix/InceptionResnetV1/Bottleneck/BatchNorm/Const
prefix/InceptionResnetV1/Bottleneck/BatchNorm/beta
prefix/InceptionResnetV1/Bottleneck/BatchNorm/beta/read
prefix/InceptionResnetV1/Bottleneck/BatchNorm/Reshape/shape
prefix/InceptionResnetV1/Bottleneck/weights
prefix/InceptionResnetV1/Bottleneck/weights/read
prefix/InceptionResnetV1/Logits/Flatten/flatten/Reshape/shape/1
prefix/InceptionResnetV1/Logits/Flatten/flatten/strided_slice/stack_2
prefix/InceptionResnetV1/Logits/Flatten/flatten/strided_slice/stack_1
prefix/InceptionResnetV1/Logits/Flatten/flatten/strided_slice/stack
prefix/InceptionResnetV1/Block8/mul/x
prefix/InceptionResnetV1/

prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/cond/FusedBatchNorm/Switch_1
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/cond/Switch
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/cond/switch_t
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/cond/Const_1
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/cond/Const
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/cond_1/pred_id
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/cond_1/AssignMovingAvg_1/Switch
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/cond_1/AssignMovingAvg_1/sub/Switch
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/cond_1/AssignMovingAvg/Switch
prefix/InceptionResnetV1/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/cond_1/AssignMovingAvg/sub/Switch
pref

prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/cond/Merge
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/cond_1/Switch_1
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/cond_1/Identity/Switch
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/cond_1/Identity
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/cond_1/Merge
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/cond/FusedBatchNorm_1/Switch
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/cond/FusedBatchNorm_1
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/cond/FusedBatchNorm/Switch
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/cond/FusedBatchNorm
prefix/InceptionResnetV1/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/cond/Merge_2
prefix/Incepti

layer name : conv1_1
layer name : relu1_1
layer name : conv1_2
layer name : relu1_2
layer name : pool1
layer name : conv2_1
layer name : relu2_1
layer name : conv2_2
layer name : relu2_2
layer name : pool2
layer name : conv3_1
layer name : relu3_1
layer name : conv3_2
layer name : relu3_2
layer name : conv3_3
layer name : relu3_3
layer name : pool3
layer name : conv4_1
layer name : relu4_1
layer name : conv4_2
layer name : relu4_2
layer name : conv4_3
layer name : relu4_3
layer name : pool4
layer name : conv5_1
layer name : relu5_1
layer name : conv5_2
layer name : relu5_2
layer name : conv5_3
layer name : relu5_3
layer name : pool5
layer name : fc6
layer name : relu6
layer name : fc7
layer name : relu7
layer name : fc8
layer name : prob
end model loding
start model learn
end model learn
Iteration 0
sum :  -146225.84
cost:  5893461000000000.0
Iteration 100
sum :  513130.62
cost:  128784594000000.0
Iteration 200
sum :  816087.4
cost:  36817747000000.0
Iteration 300
sum :  872201.6
cost:

content loss Conv2d_2a_3x3

style loss Conv2d_2a_3x3 Conv2d_4a_3x3 mixed_6a mixed_7a

In [None]:
asdf