In [17]:
import os
import math
import numpy as np
import tensorflow.compat.v1 as tf
from PIL import Image
import time

VGG_MEAN = [103.939, 116.779, 123.68]

tf.disable_v2_behavior()

class VGGNet:
    """
    Builds VGG_16 net structure,
    load parameters from pre-train models
    """
    def __init__(self,data_dict):
        self.data_dict = data_dict

    # 获取卷积层权重
    def get_conv_filters(self, name):
        return tf.constant(self.data_dict[name][0], name = 'conv')

    # 获取全连接层权重
    def get_fc_weight(self, name):
        return tf.constant(self.data_dict[name][0], name = 'fc')

    # 获取偏置
    def get_bias(self, name):
        return tf.constant(self.data_dict[name][1], name = 'bias')

    # 创建卷积层
    def conv_layer(self, X, name):
        """Builds convolution layer."""
        with tf.name_scope(name):
            conv_w = self.get_conv_filters(name)
            conv_b = self.get_bias(name)
            # [1,1,1,1] 是步长
            h = tf.nn.conv2d(X, conv_w, [1, 1, 1, 1], padding = 'SAME')
            h = tf.nn.bias_add(h, conv_b)
            h = tf.nn.relu(h)
            return h

    def pooling_layer(self, X, name):
        """Builds pooling layer."""
        return tf.nn.max_pool(X,
                              ksize = [1,2,2,1],
                              strides = [1,2,2,1],
                              padding = 'SAME',
                              name = name)

    def fc_layer(self, X, name, activation = tf.nn.relu):
        """Builds full-connected layer."""
        with tf.name_scope(name):
            fc_w = self.get_fc_weight(name)
            fc_b = self.get_bias(name)
            h = tf.matmul(X, fc_w)
            h = tf.nn.bias_add(h, fc_b)
            if activation is None:
                return h
            else:
                return activation(h)

    def flatten_layer(self, X, name):
        # 展平操作.
        with tf.name_scope(name):
            # [batch_size, image_width, image_height, channel]
            X_shape = X.get_shape().as_list()
            dim = 1
            for d in X_shape[1:]:
                dim *= d
            X = tf.reshape(X, [-1, dim])
            return X
    def build(self, X_rgb):
        """
        Build VGG16 network structure.
        :param X_rgb:[1,224,224,3]
        :return:
        """
        start_time = time.time()
        print('building model...')
        r,g,b = tf.split(X_rgb, [1,1,1], axis = 3)
        X_bgr = tf.concat(
            [b - VGG_MEAN[0],
             g - VGG_MEAN[1],
             r - VGG_MEAN[2]],
             axis = 3)
        assert X_bgr.get_shape().as_list()[1:] == [224,224,3]

        self.conv1_1 = self.conv_layer(X_bgr,'conv1_1')
        self.conv1_2 = self.conv_layer(self.conv1_1,'conv1_2')
        self.pool1 = self.pooling_layer(self.conv1_2,'pool1')

        self.conv2_1 = self.conv_layer(self.pool1,'conv2_1')
        self.conv2_2 = self.conv_layer(self.conv2_1,'conv2_2')
        self.pool2 = self.pooling_layer(self.conv2_2,'pool2')

        self.conv3_1 = self.conv_layer(self.pool2,'conv3_1')
        self.conv3_2 = self.conv_layer(self.conv3_1,'conv3_2')
        self.conv3_3 = self.conv_layer(self.conv3_2,'conv3_3')
        self.pool3 = self.pooling_layer(self.conv3_3,'pool3')

        self.conv4_1 = self.conv_layer(self.pool3,'conv4_1')
        self.conv4_2 = self.conv_layer(self.conv4_1,'conv4_2')
        self.conv4_3 = self.conv_layer(self.conv4_2,'conv4_3')
        self.pool4 = self.pooling_layer(self.conv4_3,'pool4')

        self.conv5_1 = self.conv_layer(self.pool4,'conv5_1')
        self.conv5_2 = self.conv_layer(self.conv5_1,'conv5_2')
        self.conv5_3 = self.conv_layer(self.conv5_2,'conv5_3')
        self.pool5 = self.pooling_layer(self.conv5_3,'pool5')

        '''
        self.flatten5 = self.flatten_layer(self.pool5, 'flatten')
        self.fc6 = self.fc_layer(self.flatten5, 'fc6')
        self.fc7 = self.fc_layer(self.fc6, 'fc7')
        self.fc8 = self.fc_layer(self.fc7, 'fc8',activation = None)
        self.prob = tf.nn.softmax(self.fc8, name = 'prob')
        '''

        print('building model finished: %4ds' % (time.time() - start_time))

In [24]:
vgg16_npy_path = './vgg16.npy'
content_img_path = './gugong.jpg'
style_img_path = './xingkong.jpeg'

num_steps = 100
learning_rate = 10

lambda_c = 0.1
lambda_s = 500

output_dir = './run_style_transfer'

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
# data_dict = np.load(vgg16_npy_path, allow_pickle=True, encoding = 'latin1').item()
#
# vgg16_for_result = VGGNet(data_dict)
# content = tf.placeholder(tf.float32, shape = [1, 224, 224, 3])
# vgg16_for_result.build(content)

In [22]:
def initial_result(shape, mean, stddev):
    initial = tf.truncated_normal(shape, mean = mean, stddev = stddev)
    return tf.Variable(initial)

def read_img(img_name):
    img = Image.open(img_name)
    np_img = np.array(img) #(224, 224, 3)
    np_img = np.asarray([np_img], dtype=np.int32) #(1, 224, 224, 3)
    return np_img

def gram_matrix(X):
    """
    Calculates gram matrix
    :param X:从某一个卷积层得到的一个输出.shape: [1, width, height, channel]
    :return:
    """
    b, w, h, ch = X.get_shape().as_list()
    features = tf.reshape(X, [b, h * w, ch]) # [ch, ch] -> (i, j)
    # [h*w, ch] matrix -> [ch, h*w] * [h*w, ch] -> [ch, ch]
    gram = tf.matmul(features, features, adjoint_a = True)\
        / tf.constant(ch * w * h, tf.float32)
    return gram

result = initial_result((1, 224, 224, 3), 127.5, 20)
content_value = read_img(content_img_path)
style_value = read_img(style_img_path)

content = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])
style = tf.placeholder(tf.float32, shape=[1, 224, 224, 3])

data_dict = np.load(vgg16_npy_path, allow_pickle=True, encoding = 'latin1').item()
vgg_for_content = VGGNet(data_dict)
vgg_for_style = VGGNet(data_dict)
vgg_for_result = VGGNet(data_dict)

vgg_for_content.build(content)
vgg_for_style.build(style)
vgg_for_result.build(result)

content_features = [
    vgg_for_content.conv1_2
    #vgg_for_content.conv2_2,
    # vgg_for_content.conv3_3,
    # vgg_for_content.conv4_3,
    # vgg_for_content.conv5_3
]

result_content_features = [
    vgg_for_result.conv1_2
    #vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    # vgg_for_result.conv4_3,
    # vgg_for_result.conv5_3
]

# feature_size: [1, width, height, channel]
style_features = [
    # vgg_for_style.conv1_2,
    # vgg_for_style.conv2_2,
    # vgg_for_style.conv3_3,
    vgg_for_style.conv4_3
    # vgg_for_style.conv5_3
]
style_gram = [gram_matrix(feature) for feature in style_features]

result_style_features = [
    #vgg_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    vgg_for_result.conv4_3
    # vgg_for_result.conv5_3
]
result_style_gram = [gram_matrix(feature) for feature in result_style_features]

content_loss = tf.zeros(1, tf.float32)
# shape: [1, width, height, channel]
for c, c_ in zip(content_features, result_content_features):
    content_loss += tf.reduce_mean((c - c_) ** 2, [1, 2, 3])

style_loss = tf.zeros(1, tf.float32)
for s, s_ in zip(style_gram, result_style_gram):
    style_loss += tf.reduce_mean((s - s_) ** 2, [1, 2])

loss = content_loss * lambda_c + style_loss * lambda_s
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

building model...
building model finished:    0s
building model...
building model finished:    0s
building model...
building model finished:    0s


In [23]:
init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    for step in range(num_steps):
        loss_value, content_loss_value, style_loss_value, _ = \
            sess.run([loss, content_loss, style_loss, train_op],
                     feed_dict = {
                        content: content_value,
                         style: style_value
                     })
        print('step: %d, loss_value: %8.4f, content_loss: %8.4f, style_loss: %8.4f' \
              % (step + 1,
                 loss_value[0],
                 content_loss_value[0],
                 style_loss_value[0]))
        result_img_path = os.path.join(
            output_dir, 'result-%05d.jpg' % (step + 1)
        )
        result_val = result.eval(sess)[0]
        result_val = np.clip(result_val, 0, 255)
        img_arr = np.asarray(result_val, np.uint8)
        img = Image.fromarray(img_arr)
        img.save(result_img_path)

step: 1, loss_value: 14143.6895, content_loss: 60400.4961, style_loss:  16.2073
step: 2, loss_value: 11810.3896, content_loss: 45856.9297, style_loss:  14.4494
step: 3, loss_value: 9081.0078, content_loss: 37219.8984, style_loss:  10.7180
step: 4, loss_value: 7218.0146, content_loss: 32550.7109, style_loss:   7.9259
step: 5, loss_value: 6784.0840, content_loss: 29948.1270, style_loss:   7.5785
step: 6, loss_value: 6321.9780, content_loss: 28788.4941, style_loss:   6.8863
step: 7, loss_value: 5476.2290, content_loss: 28222.9180, style_loss:   5.3079
step: 8, loss_value: 5110.4473, content_loss: 28100.3164, style_loss:   4.6008
step: 9, loss_value: 4787.3984, content_loss: 28063.9922, style_loss:   3.9620
step: 10, loss_value: 4633.9863, content_loss: 28036.0254, style_loss:   3.6608
step: 11, loss_value: 4410.9878, content_loss: 27950.5781, style_loss:   3.2319
step: 12, loss_value: 4257.6484, content_loss: 27759.0684, style_loss:   2.9635
step: 13, loss_value: 4133.4409, content_loss: 