## _"Texture Synthesis Using Convolutional Neural Networks" - Tensorflow 实现

### Summary
实现分为4步
- 预处理图像
- 自定义网络结构，设置权重
- 计算损失函数
- 训练并生成结果

In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from functools import reduce
from PIL import Image

In [None]:
def load_img(path_to_img):
  max_dim = 512
  img = tf.io.read_file(path_to_img)
  img = tf.image.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, tf.float32)

  shape = tf.cast(tf.shape(img)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim / long_dim

  new_shape = tf.cast(shape * scale, tf.int32)

  img = tf.image.resize(img, new_shape)
  img = img[tf.newaxis, :]
  return img    ###使用tensorflow的官方接口读取图像，设定最大尺寸为512，以便于训练，测试结果

### 设置初始权重，本实现只使用了style_weight，其他可以后续测试时额外添加

In [None]:
style_weight=1
total_variation_weight=1e-3
norm_term = 6
norm_weight = 0.1

### 第一步 图像预处理
读取图片，并生成初始噪声图像
vgg所需要的图像格式在模型内部处理，不在此处实现

In [None]:
style_path = "texture_5.png"#读取纹理图片
style_image = load_img(style_path)#转换为张量
image = tf.Variable(tf.random.normal(shape=style_image.shape,mean=0,stddev=1))  #用纹理图像生成初始噪声图像


### 第二步 自定义网络模型
- 将vgg19中的最大池化改为平均池化，可以使结果更加平滑
- 根据所需要的层提取出只有指定层输出的新模型

In [None]:
def setAvePolConfig(configDict):
  configDict['class_name']= 'AveragePooling2D'
  configDict['padding'] = 'same'

In [None]:
def creatModel():
    vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
    vgg.trainable = False
    config = vgg.get_config()
    layersConfig = config['layers']
    pool_index=  [3,6,11,16,21]
    for i in pool_index:
      setAvePolConfig(layersConfig[i])
    model = tf.keras.Model.from_config(config)
    model.set_weights(vgg.get_weights())
    return model

In [None]:
def vgg_layers(layer_names,vgg):
  outputs = [vgg.get_layer(name).output for name in layer_names]
  model = tf.keras.Model([vgg.input], outputs)
  return model

In [None]:
custom_vgg = creatModel() #自定义vgg模型
style_layers = [layer.name for layer in custom_vgg.layers][1:] #除了input层，其他全加入风格损失
num_style_layers = len(style_layers)

In [None]:
style_extractor = vgg_layers(style_layers,custom_vgg)
style_outputs = style_extractor(style_image*255)

### 第三步 获取噪声图像的输出，计算损失函数
单层损失函数设置为 L = (每层的格拉姆矩阵之差的平方和)*weight

总损失函数为各层的L之和

对损失函数进行梯度下降，不断更新像素值，就能得到目标的纹理结果

In [None]:
def gram_matrix(input_tensor):#计算格拉姆矩阵
  result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
  input_shape = tf.shape(input_tensor)
  num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32)
  return result/(num_locations)

In [None]:
def style_loss(outputs):
    style_outputs = outputs['style']
    style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2) 
                           for name in style_outputs.keys()])
    style_loss *= style_weight / num_style_layers

    loss = style_loss
    return loss #纹理风格损失，其中style_loss是每层的目标图像和原始图像格拉姆矩阵之差的平方和

In [None]:
'''获取所需要的各层输出'''
class StyleModel(tf.keras.models.Model):
  def __init__(self, style_layers,model):
    super(StyleModel, self).__init__()
    self.vgg =  vgg_layers(style_layers,model)
    self.style_layers = style_layers
    self.num_style_layers = len(style_layers)
    self.vgg.trainable = False

  def call(self, inputs):
    "Expects float input in [0,1]"
    inputs = inputs*255.0
    preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs)
    outputs = self.vgg(preprocessed_input)
    style_outputs = (outputs[:self.num_style_layers])

    style_outputs = [gram_matrix(style_output)
                     for style_output in style_outputs]

    style_dict = {style_name:value
                  for style_name, value
                  in zip(self.style_layers, style_outputs)}
    
    return {'style':style_dict}

In [None]:
extractor = StyleModel(style_layers,custom_vgg)

In [None]:
style_targets = extractor(style_image)['style']

In [None]:
### 第四步，开启训练
论文中推荐使用LBFGS算法，但因为调用困难，此次选择Adam优化

In [None]:
opt = tf.optimizers.Adam(learning_rate=0.2, beta_1=0.99, epsilon=1e-1)

In [None]:
train_loss = tf.keras.metrics.Mean(name='train_loss')

In [None]:
def clip_0_1(image):
  return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) #像素切分

In [None]:
@tf.function()
def train_step(image):
  with tf.GradientTape() as tape:
    outputs = extractor(image)
    loss = style_loss(outputs)
    #loss = style_loss(outputs)+noise_loss(image) 图像自身的范式和
    #loss += total_variation_weight*total_variation_loss(image) #总变分损失
  grad = tape.gradient(loss, image)
  opt.apply_gradients([(grad, image)])
  image.assign(clip_0_1(image)) #由于模型要求输入要在[0,1]，进行范围限制
  train_loss(loss)

In [None]:
## 进行训练，一般来说6000-7000次可以得到简单纹理令人满意的结果
import time

epochs = 70
steps_per_epoch = 100

step = 0
for n in range(epochs):
  for m in range(steps_per_epoch):
    step += 1
    train_step(image)
    print(".", end='')
  plt.imshow(image.read_value()[0])
  plt.title("Train step: {}".format(step))
  print(train_loss.result())
  plt.show()


In [None]:
'''张量转化为图片'''
import PIL
def tensor_to_image(tensor):
  tensor = tensor*255
  tensor = np.array(tensor, dtype=np.uint8)
  if np.ndim(tensor)>3:
    assert tensor.shape[0] == 1
    tensor = tensor[0]
  return PIL.Image.fromarray(tensor)

In [None]:
out_file = tensor_to_image(image)

In [None]:
out_file.save("out.jpg")

In [None]:
###直方图匹配，暂时未用到
def hist_match(img,ref):
  out = np.zeros_like(img)
  _, _, colorChannel = img.shape
  for i in range(colorChannel):
      print(i)
      hist_img, _ = np.histogram(img[:, :, i], 256)   # get the histogram
      hist_ref, _ = np.histogram(ref[:, :, i], 256)
      cdf_img = np.cumsum(hist_img)   # get the accumulative histogram
      cdf_ref = np.cumsum(hist_ref)
  
      for j in range(256):
          tmp = abs(cdf_img[j] - cdf_ref)
          tmp = tmp.tolist()
          idx = tmp.index(min(tmp))   # find the smallest number in tmp, get the index of this number
          out[:, :, i][img[:, :, i] == j] = idx
  return out

In [None]:
def high_pass_x_y(image):
  x_var = image[:,:,1:,:] - image[:,:,:-1,:]
  y_var = image[:,1:,:,:] - image[:,:-1,:,:]

  return x_var, y_var

In [None]:
def total_variation_loss(image):#总变分损失
  x_deltas, y_deltas = high_pass_x_y(image)
  return tf.reduce_mean(x_deltas**2) + tf.reduce_mean(y_deltas**2)

In [None]:
def norm_loss(diffs):
    shape = diffs.get_shape().as_list()
    size = reduce(lambda x, y: x * y, shape) ** 2
    sum_of_squared_diffs = tf.reduce_sum(tf.square(diffs))
    return sum_of_squared_diffs / size #目标图像噪声损失

In [None]:
def noise_loss(X):
    return (norm_loss(X)**norm_term)*norm_weight