In [None]:
%matplotlib inline 
!pip3 install d2lzh
!pip3 install mxnet
import d2lzh as d2l
from mxnet import autograd, gluon, image, init, nd, gpu
from mxnet.gluon.data.vision import datasets, transforms
from mxnet.gluon import model_zoo, nn
import time
import matplotlib.pyplot as plt

In [None]:
d2l.set_figsize()
content_img = image.imread('input_1.jpg')
d2l.plt.imshow(content_img.asnumpy());

In [None]:
style_img = image.imread('3.jpg')
d2l.plt.imshow(style_img.asnumpy());

In [None]:
# Count the data in the imagenet
rgb_mean = nd.array([0.485, 0.456, 0.406])
rgb_std = nd.array([0.229, 0.224, 0.225])
# Transfer to input type
def preprocess(img, image_shape):
  img = image.imresize(img, *image_shape)
  img = (img.astype('float32') / 255 - rgb_mean) / rgb_std 
  return img.transpose((2, 0, 1)).expand_dims(axis=0)
# Restore
def postprocess(img):
  img = img[0].as_in_context(rgb_std.context)
  return (img.transpose((1, 2, 0)) * rgb_std + rgb_mean).clip(0, 1)


In [None]:
from mxnet.gluon.model_zoo import vision as models
# VGG-19 model
pretrained_net = model_zoo.vision.vgg19(pretrained=True)
pretrained_net

In [None]:
# Note down the corresponding number in the blocks and we only need the output for those layers.
style_layers, content_layers = [0, 1, 2, 3, 5, 6, 10, 11, 19,20, 21,22,23,24,25], [25]

In [None]:
net = nn.Sequential()
for i in range(max(content_layers + style_layers) + 1):
  net.add(pretrained_net.features[i]) # Remove the unwanted layers behind

In [None]:
# Given an inputx for each layer, put into the X
#overwriteoverwrite x if is the needed layer，put the result into styles content
def extract_features(X, content_layers, style_layers): 
  contents = []
  styles = []
  for i in range(len(net)):
    X = net[i](X)
    if i in style_layers:
      styles.append(X) 
    if i in content_layers:
      contents.append(X) 
  return contents, styles


In [None]:
def get_contents(image_shape, ctx):
  content_X = preprocess(content_img, image_shape).copyto(ctx)
  contents_Y, _ = extract_features(content_X, content_layers, style_layers) # 只要contents
  return content_X, contents_Y
def get_styles(image_shape, ctx):
  style_X = preprocess(style_img, image_shape).copyto(ctx)
  _, styles_Y = extract_features(style_X, content_layers, style_layers) # 只要style
  return style_X, styles_Y


# loss function


In [None]:
def content_loss(Y_hat, Y):
  return (Y_hat - Y).square().mean()

In [None]:
def gram(X):
  num_channels, n = X.shape[1], X.size // X.shape[1] # shape[1]:channel, 0:batch
  X = X.reshape((num_channels, n))
  return nd.dot(X, X.T) / (num_channels * n)

In [None]:
def style_loss(Y_hat, gram_Y):
  return (gram(Y_hat) - gram_Y).square().mean()

In [None]:
# Reduce white noise by making the pixel values as close as possible to the collar similar
def tv_loss(Y_hat):
  return 0.5 * ((Y_hat[:, :, 1:, :] - Y_hat[:, :, :-1, :]).abs().mean() + 
                (Y_hat[:, :, :, 1:] - Y_hat[:, :, :, :-1]).abs().mean())


In [None]:
# channels = [net[l].weight.shape[0] for l in style_layers]
# style_weight = [1e4/n**2 for n in channels] #Make the style more match color, adjustable, 
#the larger the closer to the style
style_weight = 1e4
content_weight = 1
tv_weight = 10 # The larger the value, the smoother the image
def compute_loss(X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram): 
  # 分别计算内容损失、样式损失和总变差损失
  contents_l = [content_loss(Y_hat, Y) * content_weight for Y_hat, Y in zip(contents_Y_hat, contents_Y)]
  styles_l = [style_loss(Y_hat, Y) * style_weight for Y_hat, Y in zip( styles_Y_hat, styles_Y_gram)]
  tv_l = tv_loss(X) * tv_weight
  # 对所有损失求和
  l = nd.add_n(*styles_l) + nd.add_n(*contents_l) + tv_l 
  return contents_l, styles_l, tv_l, l

# Create and initialize the image graph

In [None]:
class GeneratedImage(nn.Block):
  def __init__(self, img_shape, **kwargs):
    super(GeneratedImage, self).__init__(**kwargs)
    self.weight = self.params.get('weight', shape=img_shape)
  def forward(self):
    return self.weight.data()

In [None]:
def get_inits(X, ctx, lr, styles_Y): 
  gen_img = GeneratedImage(X.shape)
  gen_img.initialize(init.Constant(X), ctx=ctx, force_reinit=True)
  trainer = gluon.Trainer(gen_img.collect_params(), 'adam',{'learning_rate': lr}) 
  styles_Y_gram = [gram(Y) for Y in styles_Y]
  return gen_img(), styles_Y_gram, trainer

训练模型

In [None]:
def train(X, contents_Y, styles_Y, ctx, lr, max_epochs, lr_decay_epoch): 
  X, styles_Y_gram, trainer = get_inits(X, ctx, lr, styles_Y)
  for i in range(max_epochs):
    start = time.time() 
    with autograd.record():
      contents_Y_hat, styles_Y_hat = extract_features(X, content_layers, style_layers)
      contents_l, styles_l, tv_l, l = compute_loss(X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram)
    l.backward() 
    trainer.step(1) 
    nd.waitall()
    if i % 50 == 0 and i != 0:
      print('epoch %3d, content loss %.2f, style loss %.2f, ' 'TV loss %.2f, %.2f sec'
             % (i, nd.add_n(*contents_l).asscalar(),
                nd.add_n(*styles_l).asscalar(), tv_l.asscalar(),
                time.time() - start))
    if i % lr_decay_epoch == 0 and i != 0:
      trainer.set_learning_rate(trainer.learning_rate * 0.1)
      print('change lr to %.1e' % trainer.learning_rate)
  plt.imshow(postprocess(X).asnumpy()) 
  plt.show()
  return X

In [None]:
ctx, image_shape = d2l.try_gpu(), (225, 150)
net.collect_params().reset_ctx(ctx)
content_X, contents_Y = get_contents(image_shape, ctx)
_, styles_Y = get_styles(image_shape, ctx)
output = train(content_X, contents_Y, styles_Y, ctx, 0.01, 100, 200)

In [None]:
image_shape = (450, 300)
_, content_Y = get_contents(image_shape, ctx)
_, style_Y = get_styles(image_shape, ctx)
X = preprocess(postprocess(output) * 255, image_shape)
output = train(X, content_Y, style_Y, ctx, 0.01, 1000, 300)

In [None]:
d2l.plt.imsave('neural-style-3.png', postprocess(output).asnumpy())