In [21]:
import numpy as np
import os
import math
import tensorflow as tf
import PIL as Image
import time

VGG_MEAN = [103.939, 116.779, 123.68]

class VGGNet:
    
    def __init__(self, data_dict):
        self.data_dict = data_dict
        
    def get_conv_fiter(self, name):
        return tf.constant(self.data_dict[name][0], name='conv')
    
    def get_fc_weight(self, name):
        return tf.constant(self.data_dict[name][0], name='fc')
    
    def get_bias(self, name):
        return tf.constant(self.data_dict[name][1], name='bias')
    
    def conv_layer(self, x, name):
        with tf.name_scope(name):
            conv_w = self.get_conv_fiter(name)
            conv_b = self.get_bias(name)
            h = tf.nn.conv2d(x, conv_w, [1,1,1,1], padding='SAME')
            h = tf.nn.bias_add(h, conv_b)
            h = tf.nn.relu(h)
            return h
        
    def pooling_layer(self, x, name):
        """builds pooling layer"""
        return tf.nn.max_pool(x, 
                             ksize = [1,2,2,1],
                             strides = [1,2,2,1],
                             padding = 'SAME',
                             name = name )
    
    def fc_layer(self, x, name, activation=tf.nn.relu):
        """build fully-connected layer"""
        with tf.name_scope(name):
            fc_w = self.get_fc_weight(name)
            fc_b = self.get_bias(name)
            h = tf.matmul(x, fc_w)
            h = tf.nn.bias_add(h, fc_b)
            if activation == None:
                return h
            else:
                return activation(h)
        
    def flatten_layer(self, x, name):
        """build flatten layer"""
        with tf.name_scope(name):
            x_shape = x.get_shape().as_list()
            dim = 1
            for d in x_shape[1:]:
                dim *= d
            x = tf.reshape(x, [-1, dim])
            return x
        
    def build(self, x_rgb):
        """build VGG16 network structure.
        parameter: 
        - x_rgb: [1, 244, 244, 3]
        """
        start_time = time.time()
        print('building model....')
        
        r, g, b = tf.split(x_rgb, [1,1,1], axis=3) # axis表示上面的x_rgb里的3, 就是将这个三通道分离出来，分离成r, g, b
        x_bgr = tf.concat(                       #  减去均值
                        [b - VGG_MEAN[0],
                        g - VGG_MEAN[1],
                        r - VGG_MEAN[2]],
                        axis = 3 )
        
        assert x_bgr.get_shape().as_list()[1:] == [224,224,3]
        
        self.conv1_1 = self.conv_layer(x_bgr, b'conv1_1') # 这里的名字必须跟pre model也就是vgg16里的命名一致
        self.conv1_2 = self.conv_layer(self.conv1_1, b'conv1_2')
        self.pool1 = self.pooling_layer(self.conv1_2, 'pool1')
        
        self.conv2_1 = self.conv_layer(self.pool1, b'conv2_1') 
        self.conv2_2 = self.conv_layer(self.conv2_1, b'conv2_2')
        self.pool2 = self.pooling_layer(self.conv2_2, 'pool2')
        
        self.conv3_1 = self.conv_layer(self.pool2, b'conv3_1') 
        self.conv3_2 = self.conv_layer(self.conv3_1, b'conv3_2')
        self.conv3_3 = self.conv_layer(self.conv3_2, b'conv3_3')
        self.pool3 = self.pooling_layer(self.conv3_3, 'pool3')
        
        self.conv4_1 = self.conv_layer(self.pool3, b'conv4_1') 
        self.conv4_2 = self.conv_layer(self.conv4_1, b'conv4_2')
        self.conv4_3 = self.conv_layer(self.conv4_2, b'conv4_3')
        self.pool4 = self.pooling_layer(self.conv4_3, 'pool4')
        
        self.conv5_1 = self.conv_layer(self.pool4, b'conv5_1') 
        self.conv5_2 = self.conv_layer(self.conv5_1, b'conv5_2')
        self.conv5_3 = self.conv_layer(self.conv5_2, b'conv5_3')
        self.pool5 = self.pooling_layer(self.conv5_3, 'pool5')
        
        # 这里暂时注释，耗时最多的在构建全连接层
#         self.flatten5 = self.flatten_layer(self.pool5, 'flatten')
#         self.fc6 = self.fc_layer(self.flatten5, b'fc6')
#         self.fc7 = self.fc_layer(self.fc6, b'fc7')
#         self.fc8 = self.fc_layer(self.fc7, b'fc8', activation=None)
#         self.prob = tf.nn.softmax(self.fc8, name='prob')
        
        print('building modle finished: %4ds' % (time.time() - start_time))
        

In [22]:
# 测试计算图
# vgg16_npy_path = './vgg16.npy'
# data_dict = np.load(file=vgg16_npy_path, encoding='bytes').item()

# vgg16_for_result = VGGNet(data_dict)
# content = tf.placeholder(tf.float32, shape=[1,224,224,3])
# vgg16_for_result.build(content)


building model....
building modle finished:    0s


In [None]:

vgg16_npy_path = './vgg16.npy'
content_img_path = './source_images/gugong.jpg'
style_img_path = './source_images/xingkong.jpeg'

num_steps = 100
learning_rate = 10

lambda_c = 0.1
lambda_s = 500

output_dir = './run_sytle_transfer'

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [None]:
def initial_result(shape, mean, stddev):
    initial = tf.truncated_normal(shape, mean = mean, stddev = stddev)
    return tf.Variable(initial)

def read_img(img_name):
    img = Image.open(img_name)
    np_img = np.array(img) # (224,224,3)
    np_img = np.asarray([np_img], dtype=np.int32) # (1, 224, 224, 3)
    return np_img

def gram_matrix(x):
    """Calculate gram matrix
    Args:
    -x: features extracted from vgg net. shape: [1, width, height, chanel]
    """
    b, w, h, ch = x.get_shape().as_list()
    features = tf.reshape(x, [b, h*w, ch]) # [ch, ch] -> (i, j)
    
    gram = tf.matmul(features, features, adjoint_a=True) \
        / tf.constant(ch * w * h, tf.float32)
    return gram

result = initial_result((1,224,224,3), 127.5, 20)

content_val = read_img(content_img_path)
style_val = read_img(style_img_path)

content = tf.placeholder(tf.float32, shape=[1,224,224,3])
style = tf.placeholder(tf.float32, shape=[1,224,224,3])    
    
data_dict = np.load(file=vgg16_npy_path, encoding='bytes').item()
vgg_for_content = VGGNet(data_dict)
vgg_for_style = VGGNet(data_dict)
vgg_for_result = VGGNet(data_dict)

vgg_for_content.build(content)
vgg_for_style.build(style)
vgg_for_result.build(result)


content_features = [
    vgg_for_content.conv1_2,
#     vgg_for_content.conv2_2,
#     vgg_for_content.conv3_3,
#     vgg_for_content.conv4_3,
#     vgg_for_content.conv5_3,
]

result_content_features = [
    vgg_for_result.conv1_2,
#     vgg_for_result.conv2_2,
#     vgg_for_result.conv3_3,
#     vgg_for_result.conv4_3,
#     vgg_for_result.conv5_3,
]

style_features = [
#     vgg_for_style.conv1_2,
#     vgg_for_style.conv2_2,
#     vgg_for_style.conv3_3,
    vgg_for_style.conv4_3,
#     vgg_for_style.conv5_3,
]
style_gram = [gram_matrix(feature) for feature in style_features]

result_style_features = [
#     vgg_for_result.conv1_2,
#     vgg_for_result.conv2_2,
#     vgg_for_result.conv3_3,
    vgg_for_result.conv4_3,
#     vgg_for_result.conv5_3,
]

result_style_gram = [gram_matrix(feature) for feature in result_style_features]

content_loss = tf.zeros(1, tf.float32)
# zip: [1,2],[3,4], zip([1,2],[3,4]) -> [(1,3),(2,4)]
# shape: [1, width, height, channel]
for c, c_ in zip(content_features, result_content_features):
    content_loss += tf.reduce_mean((c - c_) ** 2, [1,2,3]) # [1,2,3] 表示width, height, channel这三项
    
style_loss = tf.zeros(1, tf.float32)
for s, s_ in zip(style_gram, result_style_gram):
    style_loss += tf.mean((s - s_) ** 2, [1,2,3])
    
loss = content_loss * lambda_c + style_loss * lambda_s

with tf.name_scope('train_op'):
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    