Initial commit

anishathalye · Nov 22, 2015 · 4f1ea8e · 4f1ea8e
commit 4f1ea8e
Show file tree

Hide file tree

Showing 5 changed files with 228 additions and 0 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,14 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+indent_style = space
+trim_trailing_whitespace = true
+
+[*.py]
+indent_size = 4
+
+[*.md]
+trim_trailing_whitespace = false
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.mat
+env/
diff --git a/README.md b/README.md
@@ -0,0 +1,13 @@
+# neural-style
+
+An implementation of [this paper](http://arxiv.org/pdf/1508.06576v2.pdf) in
+TensorFlow.
+
+## Requirements
+
+* TensorFlow
+* SciPy
+* Pillow
+* NumPy
+* [Pre-trained VGG
+  network](http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat)
diff --git a/neural_style.py b/neural_style.py
@@ -0,0 +1,125 @@
+import vgg
+
+import tensorflow as tf
+import numpy as np
+import scipy.misc as sm
+
+import sys
+import math
+
+VGG_PATH = 'imagenet-vgg-verydeep-19.mat'
+CONTENT_LAYER = 'relu4_2'
+STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
+NOISE_RATIO = 0.0
+ALPHA = 1.0 # weight of content loss
+BETA = 1e4 # weight of style loss
+LEARNING_RATE_INITIAL = 2e1
+LEARNING_DECAY_BASE = 0.94
+LEARNING_DECAY_STEPS = 100
+
+def imread(path):
+    return sm.imread(path).astype(np.float)
+
+def imsave(path, img):
+    img = np.clip(img, 0, 255).astype(np.uint8)
+    sm.imsave(path, img)
+
+def main():
+    content_path, style_path, width, style_scale = sys.argv[1:]
+    width = int(width)
+    style_scale = float(style_scale)
+
+    content_image = imread(content_path)
+    style_image = imread(style_path)
+
+    if width <= 0:
+        width = content_image.shape[1]
+
+    content_aspect = (float(content_image.shape[0]) /
+            content_image.shape[1]) # height / width
+    new_shape = (int(math.floor(float(content_image.shape[0]) /
+            content_image.shape[1] * width)), width)
+    content_image = sm.imresize(content_image, new_shape)
+    style_aspect = (float(style_image.shape[0]) /
+            style_image.shape[1])
+    if style_scale > 0:
+        style_image_scaled = sm.imresize(style_image, style_scale)
+        shape = style_image_scaled.shape
+        if shape[0] >= new_shape[0] and shape[1] >= new_shape[1]:
+            style_image = style_image_scaled
+        else:
+            style_scale = -1
+    if style_scale <= 0:
+        matched_height = int(math.ceil(new_shape[1] * style_aspect))
+        if matched_height >= new_shape[0]:
+            style_image = sm.imresize(style_image, (matched_height, new_shape[1]))
+        else:
+            matched_width = int(math.ceil(new_shape[0] / style_aspect))
+            style_image = sm.imresize(style_image, (new_shape[0], matched_width))
+    style_image = style_image[0:new_shape[0], 0:new_shape[1], :]
+    assert content_image.shape == style_image.shape
+
+    shape = (1,) + content_image.shape
+
+    content_features = {}
+    style_features = {}
+    g = tf.Graph()
+    with g.as_default():
+        image = tf.placeholder('float', shape=shape)
+        net, mean_pixel = vgg.net(VGG_PATH, image)
+
+        with tf.Session() as sess:
+            content_pre = np.array([vgg.preprocess(content_image, mean_pixel)])
+            content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
+                    feed_dict={image: content_pre})
+
+            style_pre = np.array([vgg.preprocess(style_image, mean_pixel)])
+            for layer in STYLE_LAYERS:
+                style_features[layer] = net[layer].eval(
+                        feed_dict={image: style_pre})
+
+    g = tf.Graph()
+    with g.as_default():
+        global_step = tf.Variable(0, trainable=False)
+        noise = np.random.normal(size=shape, scale=np.std(content_image) * 0.1)
+        content_pre = vgg.preprocess(content_image, mean_pixel)
+        init = content_pre * (1 - NOISE_RATIO) + noise * NOISE_RATIO
+        init = init.astype('float32')
+        image = tf.Variable(init)
+        net, _ = vgg.net(VGG_PATH, image)
+
+        content_loss = tf.nn.l2_loss(
+                net[CONTENT_LAYER] - content_features[CONTENT_LAYER])
+        style_losses = []
+        for i in STYLE_LAYERS:
+            layer = net[i]
+            _, height, width, number = map(lambda i: i.value, layer.get_shape())
+            feats = tf.reshape(layer, (-1, number))
+            gram = tf.matmul(tf.transpose(feats), feats)
+
+            match = style_features[i]
+            match_feats = np.reshape(match, (-1, match.shape[3]))
+            match_gram = np.matmul(match_feats.T, match_feats)
+
+            style_losses.append(tf.nn.l2_loss(gram - match_gram) /
+                    (4.0 * number ** 2 * (height * width) ** 2))
+        style_loss = reduce(tf.add, style_losses) / len(style_losses)
+        loss = ALPHA * content_loss + BETA * style_loss
+
+        learning_rate = tf.train.exponential_decay(LEARNING_RATE_INITIAL,
+                global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE,
+                staircase=True)
+        train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss,
+                global_step=global_step)
+
+        with tf.Session() as sess:
+            sess.run(tf.initialize_all_variables())
+            for i in range(10000):
+                print 'i = %d' % i
+                imsave('%05d.jpg' % i, vgg.unprocess(
+                        image.eval().reshape(shape[1:]), mean_pixel))
+                train_step.run()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/vgg.py b/vgg.py
@@ -0,0 +1,74 @@
+import tensorflow as tf
+import numpy as np
+import scipy.io as sio
+
+def _conv_layer(weights, bias):
+    def _make_layer(input):
+        conv = tf.nn.conv2d(input, tf.constant(weights), strides=[1, 1, 1, 1],
+                padding='SAME')
+        return tf.nn.bias_add(conv, bias)
+    return _make_layer
+
+def _pool_layer():
+    def _make_layer(input):
+        return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+    return _make_layer
+
+def _add_layer(input_image, layers, func):
+    if not layers:
+        new = func(input_image)
+    else:
+        new = func(layers[-1])
+    layers.append(new)
+
+def preprocess(image, mean_pixel):
+    return image - mean_pixel
+
+def unprocess(image, mean_pixel):
+    image = image + mean_pixel
+    return image
+
+
+def net(data_path, input_image):
+    layers = [
+        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
+
+        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
+
+        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
+        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
+
+        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
+        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
+
+        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
+        'relu5_3', 'conv5_4', 'relu5_4'
+    ]
+
+
+    data = sio.loadmat(data_path)
+    mean = data['normalization'][0][0][0]
+    mean_pixel = np.mean(mean, axis=(0, 1))
+    constants = data['layers'][0]
+
+    net = []
+    for i, kind in enumerate(layers):
+        short = kind[:4]
+        if short == 'conv':
+            weights = constants[i][0][0][0][0][0]
+            # in matconvnet, weights are [width, height, depth, num_filters]
+            # but in tensorflow, [height, width, in_channels, out_channels]
+            weights = np.transpose(weights, (1, 0, 2, 3))
+            bias = constants[i][0][0][0][0][1].reshape(-1)
+            new = _conv_layer(weights, bias)
+        elif short == 'relu':
+            new = tf.nn.relu
+        elif short == 'pool':
+            new = _pool_layer()
+        else:
+            raise ValueError('invalid layer type: %s' % kind)
+        _add_layer(input_image, net, new)
+
+    assert len(layers) == len(net)
+
+    return dict(zip(layers, net)), mean_pixel