diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..8638d94 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,14 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +indent_style = space +trim_trailing_whitespace = true + +[*.py] +indent_size = 4 + +[*.md] +trim_trailing_whitespace = false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dceaf6a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.mat +env/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..37f8059 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# neural-style + +An implementation of [this paper](http://arxiv.org/pdf/1508.06576v2.pdf) in +TensorFlow. + +## Requirements + +* TensorFlow +* SciPy +* Pillow +* NumPy +* [Pre-trained VGG + network](http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat) diff --git a/neural_style.py b/neural_style.py new file mode 100644 index 0000000..da54b8f --- /dev/null +++ b/neural_style.py @@ -0,0 +1,125 @@ +import vgg + +import tensorflow as tf +import numpy as np +import scipy.misc as sm + +import sys +import math + +VGG_PATH = 'imagenet-vgg-verydeep-19.mat' +CONTENT_LAYER = 'relu4_2' +STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1') +NOISE_RATIO = 0.0 +ALPHA = 1.0 # weight of content loss +BETA = 1e4 # weight of style loss +LEARNING_RATE_INITIAL = 2e1 +LEARNING_DECAY_BASE = 0.94 +LEARNING_DECAY_STEPS = 100 + +def imread(path): + return sm.imread(path).astype(np.float) + +def imsave(path, img): + img = np.clip(img, 0, 255).astype(np.uint8) + sm.imsave(path, img) + +def main(): + content_path, style_path, width, style_scale = sys.argv[1:] + width = int(width) + style_scale = float(style_scale) + + content_image = imread(content_path) + style_image = imread(style_path) + + if width <= 0: + width = content_image.shape[1] + + content_aspect = (float(content_image.shape[0]) / + content_image.shape[1]) # height / width + new_shape = (int(math.floor(float(content_image.shape[0]) / + content_image.shape[1] * width)), width) + content_image = sm.imresize(content_image, new_shape) + style_aspect = (float(style_image.shape[0]) / + style_image.shape[1]) + if style_scale > 0: + style_image_scaled = sm.imresize(style_image, style_scale) + shape = style_image_scaled.shape + if shape[0] >= new_shape[0] and shape[1] >= new_shape[1]: + style_image = style_image_scaled + else: + style_scale = -1 + if style_scale <= 0: + matched_height = int(math.ceil(new_shape[1] * style_aspect)) + if matched_height >= new_shape[0]: + style_image = sm.imresize(style_image, (matched_height, new_shape[1])) + else: + matched_width = int(math.ceil(new_shape[0] / style_aspect)) + style_image = sm.imresize(style_image, (new_shape[0], matched_width)) + style_image = style_image[0:new_shape[0], 0:new_shape[1], :] + assert content_image.shape == style_image.shape + + shape = (1,) + content_image.shape + + content_features = {} + style_features = {} + g = tf.Graph() + with g.as_default(): + image = tf.placeholder('float', shape=shape) + net, mean_pixel = vgg.net(VGG_PATH, image) + + with tf.Session() as sess: + content_pre = np.array([vgg.preprocess(content_image, mean_pixel)]) + content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval( + feed_dict={image: content_pre}) + + style_pre = np.array([vgg.preprocess(style_image, mean_pixel)]) + for layer in STYLE_LAYERS: + style_features[layer] = net[layer].eval( + feed_dict={image: style_pre}) + + g = tf.Graph() + with g.as_default(): + global_step = tf.Variable(0, trainable=False) + noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1) + content_pre = vgg.preprocess(content_image, mean_pixel) + init = content_pre * (1 - NOISE_RATIO) + noise * NOISE_RATIO + init = init.astype('float32') + image = tf.Variable(init) + net, _ = vgg.net(VGG_PATH, image) + + content_loss = tf.nn.l2_loss( + net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) + style_losses = [] + for i in STYLE_LAYERS: + layer = net[i] + _, height, width, number = map(lambda i: i.value, layer.get_shape()) + feats = tf.reshape(layer, (-1, number)) + gram = tf.matmul(tf.transpose(feats), feats) + + match = style_features[i] + match_feats = np.reshape(match, (-1, match.shape[3])) + match_gram = np.matmul(match_feats.T, match_feats) + + style_losses.append(tf.nn.l2_loss(gram - match_gram) / + (4.0 * number ** 2 * (height * width) ** 2)) + style_loss = reduce(tf.add, style_losses) / len(style_losses) + loss = ALPHA * content_loss + BETA * style_loss + + learning_rate = tf.train.exponential_decay(LEARNING_RATE_INITIAL, + global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE, + staircase=True) + train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, + global_step=global_step) + + with tf.Session() as sess: + sess.run(tf.initialize_all_variables()) + for i in range(10000): + print 'i = %d' % i + imsave('%05d.jpg' % i, vgg.unprocess( + image.eval().reshape(shape[1:]), mean_pixel)) + train_step.run() + + +if __name__ == '__main__': + main() diff --git a/vgg.py b/vgg.py new file mode 100644 index 0000000..dc43055 --- /dev/null +++ b/vgg.py @@ -0,0 +1,74 @@ +import tensorflow as tf +import numpy as np +import scipy.io as sio + +def _conv_layer(weights, bias): + def _make_layer(input): + conv = tf.nn.conv2d(input, tf.constant(weights), strides=[1, 1, 1, 1], + padding='SAME') + return tf.nn.bias_add(conv, bias) + return _make_layer + +def _pool_layer(): + def _make_layer(input): + return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') + return _make_layer + +def _add_layer(input_image, layers, func): + if not layers: + new = func(input_image) + else: + new = func(layers[-1]) + layers.append(new) + +def preprocess(image, mean_pixel): + return image - mean_pixel + +def unprocess(image, mean_pixel): + image = image + mean_pixel + return image + + +def net(data_path, input_image): + layers = [ + 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', + + 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', + + 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', + 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', + + 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', + 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', + + 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', + 'relu5_3', 'conv5_4', 'relu5_4' + ] + + + data = sio.loadmat(data_path) + mean = data['normalization'][0][0][0] + mean_pixel = np.mean(mean, axis=(0, 1)) + constants = data['layers'][0] + + net = [] + for i, kind in enumerate(layers): + short = kind[:4] + if short == 'conv': + weights = constants[i][0][0][0][0][0] + # in matconvnet, weights are [width, height, depth, num_filters] + # but in tensorflow, [height, width, in_channels, out_channels] + weights = np.transpose(weights, (1, 0, 2, 3)) + bias = constants[i][0][0][0][0][1].reshape(-1) + new = _conv_layer(weights, bias) + elif short == 'relu': + new = tf.nn.relu + elif short == 'pool': + new = _pool_layer() + else: + raise ValueError('invalid layer type: %s' % kind) + _add_layer(input_image, net, new) + + assert len(layers) == len(net) + + return dict(zip(layers, net)), mean_pixel