# 模型训练流程
* 1.Data provider 
    * a.Image data
    * b.random vector  
* 2.Build Compute graph  
    * a.generator 
    * b.discriminator 
    * c.DCGAN class 
        * connect g and d
        * define loss
        * define train_op
* 3.training process 

In [1]:
import os
import tensorflow as tf
from tensorflow import logging
from tensorflow import gfile
import sys
import pprint
import numpy as np
import pickle
import random
import math
from PIL import Image
from tensorflow.examples.tutorials.mnist import input_data

  from ._conv import register_converters as _register_converters


In [2]:
mnist = input_data.read_data_sets('../MNIST_data/', one_hot=True)# 加载数据
output_dir = './local_run' # 输出文件夹路径
if not gfile.Exists(output_dir):
    gfile.MakeDirs(output_dir) # 创建输出文件夹

def get_default_params():
    return tf.contrib.training.HParams(
        z_dim = 100, # 随机噪声数据维度
        init_conv_size = 4, # 将输入噪声变化成feature_map时,其初始size为4x4
        g_channels = [128, 64, 32, 1],# 生成器中各反卷积层的通道数目
        d_channels = [32, 64, 128, 256], # 判别器中各卷积层的通道数据,各个卷积层size减半,然后通道数加半,然后pooling
        batch_size = 128, # 每批数量
        learning_rate = 0.002, 
        beta1 = 0.5, # adam参数
        img_size = 32, # 生成图像为32x32
    )
hparams = get_default_params()


Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# data provider
class MnistData(object):
    # z_dim随机向量的长度, img_size,将train中的img resize成img_size
    def __init__(self, mnist_train, z_dim, img_size):
        self._data = mnist_train
        self._example_num = len(self._data)
        # 随机向量使用正态分布生成
        self._z_data = np.random.standard_normal((self._example_num, z_dim)) 
        # next batch 指示器
        self._indicator = 0
        # resize 图像大小,参数为要resize到的图像大小
        self._resize_mnist_img(img_size)
        # shuffle函数
        self._random_shuffle()
        
    def _random_shuffle(self):
        x = np.random.permutation(self._example_num)
        # 随机化train和z
        self._data = self._data[x]
        self._z_data = self._z_data[x]  
    
    def _resize_mnist_img(self, img_size):
        """resize the img to target imgsize,first numpy to PIL img"""
        # 1.在mnist中图像都被归一化,所以先将图像灰度级恢复到0~255,像素值类型为整形
        data = np.asarray(self._data * 255, np.uint8)
        # 2. 像素数据 [example_num, 784] -> [example_num, 28, 28]
        data = data.reshape((self._example_num, 28, 28))
        # 3.将每幅图像插值成img_size * img_size
        new_data = []
        for i in range(self._example_num):
            img = data[i]
            # 将numpy数据变成PIL对象
            img = Image.fromarray(img)
            img = img.resize((img_size, img_size))
            # 再将PTL对象变成numpy对象
            img = np.asarray(img)
            # 判别器神经网络需要图像通道信息
            img = img.reshape((img_size,img_size, 1))
            new_data.append(img)
        # 将resize后的全部图片转成numpy类型,new_data这里还是一个字典
        new_data = np.asarray(new_data, dtype=np.float32)
        # 将new_data归一化,这里将像素数据归一化到-1~1直接,为了配合generator,G中使用的是tanH
        new_data = new_data / 127.5 - 1 
        # self._data [example_num, img_size, img_size ,1]
        self._data = new_data  
        
    def next_batch(self, batch_size):
        """下一batch"""
        end_indicator = self._indicator + batch_size  
        if end_indicator > self._example_num: 
            # 如果end超出数据范围,直接shuffle,重新划分
            self._random_shuffle()
            self._indicator = 0
            end_indicator = self._indicator + batch_size
        assert end_indicator < self._example_num
        
        batch_data = self._data[self._indicator : end_indicator]
        batch_z = self._z_data[self._indicator : end_indicator]
        # 将当前indicator置为end_indicator,以便下一批次数据划分
        self._indicator = end_indicator
        return batch_data, batch_z 

mnist_data = MnistData(mnist.train.images, hparams.z_dim, hparams.img_size)
batch_data, batch_z = mnist_data.next_batch(5)

In [4]:
# generator 
def conv2d_transpose(inputs, out_channel, name, training, with_bn_relu=True):
    """
    Wrapper of conv2d transpose
    out_channel:每个反卷积层的通道数
    name:该scope命名空间
    training:在做卷积或者反卷积时都需要加一个bn操作,这里作为一个training开关
    with_bn_relu=True:bn操作不作用于输出层和输入层,而且在生成器中其他层都使用relu激活,输出层使用tanH
    所以这里默认为True,当到输出层时将该开关关闭
    """
    conv2d_trans = tf.layers.conv2d_transpose(inputs,
                                             out_channel,
                                             [5,5],
                                             strides=(2,2),
                                             padding='SAME')
    if with_bn_relu:
        bn = tf.layers.batch_normalization(conv2d_trans, training=training)
        return tf.nn.relu(bn)
    else:
        return conv2d_trans  
    
# discriminator 
def conv2d(inputs, out_channel, name, training):
    """
    Wrapper of conv2d
    out_channel:每个卷积层的通道数
    name:该scope命名空间
    training:卷积层后要做bn操作,这里给一个training开关
    """
    # 判别器中使用leaky_relu,斜率为0.2
    def leaky_relu(x, leak=0.2, name=' '):
        return tf.maximum(x, x * leak, name=name)
    
    # 加入卷积层
    with tf.variable_scope(name):
        conv2d_output = tf.layers.conv2d(inputs,
                                        out_channel,
                                        [5,5],
                                        strides=(2,2),# 这里步长为2,每次卷积输出是输入size的1/2
                                        padding='SAME')
        # 对卷积输出做bn操作
        bn = tf.layers.batch_normalization(conv2d_output, training=training)
        # 非线性变换
        return leaky_relu(bn, name='outputs')
        
class Generator(object):
    def __init__(self, channels, init_conv_size):
        """
        channels:各个反卷积层中的通道数
        """
        self._channels = channels
        self._init_conv_size = init_conv_size
        # reuse 开关,在第一次构建计算图完成后,就将该重用标志打开
        self._reuse = False
        
    def __call__(self, inputs, training):
        # 首先将输入数据转成tensor类型
        inputs = tf.convert_to_tensor(inputs)
        with tf.variable_scope('generator', reuse=self._reuse):
            """
            1.先对随机向量做一个fc层,参数W矩阵的size由init_conv_size给出
            fc层仍是一个向量 
            random_vector -> fc (size=[self.channels[0] * init_conv_size**2])
            2.对fc层进行reshape,将其变换为一个三维矩阵形式(图像形式)
            """
            with tf.variable_scope('inputs_conv'):
                # fc层的size是self.channels[0] * init_conv_size ** 2
                fc = tf.layers.dense(inputs, 
                                    self._channels[0] * self._init_conv_size * self._init_conv_size)
                # 对fc层进行reshape成一个图像格式
                # 可以看做是一个卷积层输出,shape参数中第一个-1是next_batch的大小
                conv0 = tf.reshape(fc, [-1,
                                       self._init_conv_size,
                                       self._init_conv_size,
                                       self._channels[0]])
                # 对该图像bn操作
                bn0 = tf.layers.batch_normalization(conv0, training=training)
                # 非线性变换
                relu0 = tf.nn.relu(bn0)
                
            # 进行反卷积操作
            deconv_inputs = relu0 
            # 循环实现多层反卷积,这里从1开始循环,因为真正反卷积的channel是从self._channels[1]开始的
            for i in range(1, len(self._channels)):
                # 首先要判断是否是生成器的输出层(是否是反卷积的最后一层),该层无需进行bn和relu操作
                with_bn_relu = (i != len(self._channels) - 1)  
                # 直接调用上面封装好的反卷积函数,将反卷积的输出保存在deconv_inputs中,方便下一步生成图像
                deconv_inputs = conv2d_transpose(
                    deconv_inputs,
                    self._channels[i],
                    'deconv-%d' % i,
                    training,
                    with_bn_relu)
            # 对最后一个反卷积输出层使用tanH激活,生成图像,图像像素值范围为[-1,1]
            img_inputs = deconv_inputs
            with tf.variable_scope('generate_imgs'):
                imgs = tf.tanh(img_inputs, name='imgs')
        # 在第一次计算图构建完成后,将resue打开
        self._reuse = True
        
        # 在GAN中,生成器和判别器是分开训练的,对应两个,所以两个网络中的参数也要分开保存 
        self.variables = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES,
            scope='generator')
        # 再将生成的图像返回
        return imgs  
    
class Discriminator(object):
    def __init__(self, channels):
        self._channels = channels
        self._reuse = False
        
    def __call__(self, inputs, training):
        # 首先将输入数据转成tensor类型,然后再进行卷积操作
        inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)
        conv_inputs = inputs
        with tf.variable_scope('discriminator', reuse=self._reuse):
            """
            1.直接对输入图像进行卷积操作
            2.将卷积输出flatten
            3.对flatten后的数据生成一个fc层,输出shape为2,值为0或1,表示当前判断结果
            """
            for i in range(len(self._channels)):
                conv_inputs = conv2d(conv_inputs,
                                    self._channels[i],
                                    'conv-%d' % i,
                                    training)
            # 全连接层
            fc_inputs = conv_inputs
            with tf.variable_scope('fc'):
                flatten = tf.layers.flatten(fc_inputs)
                # 全连接层输出种类为2种,分别代表判别为真实图像和生成图像
                logits = tf.layers.dense(flatten, 2, name='logits')
        # 网络构建后,将reuse开关打开
        self._reuse = True
        # 将判别器中的参数保存
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator')
        # 将判别器判别结果返回
        return logits

# connect generator and discriminator
class DCGAN(object):
    def __init__(self, hps):
        g_channels = hps.g_channels # 生成器中各反卷积层数
        d_channels = hps.d_channels # 判别器中各卷积层数
        
        self._batch_size = hps.batch_size 
        self._init_conv_size = hps.init_conv_size
        self._z_dim = hps.z_dim
        self._img_size = hps.img_size
        
        # 创建G和D的对象
        self._generator = Generator(g_channels, self._init_conv_size)
        self._discriminator = Discriminator(d_channels)
        
    def build(self):
        """
        构建计算图
        """
        # 先定义两个placeholder
        # 随机向量的placeholder
        self._z_placeholder = tf.placeholder(tf.float32, (self._batch_size, self._z_dim))
        # 真实图像的placeholder
        self._img_placeholder = tf.placeholder(tf.float32, 
                                               (self._batch_size, self._img_size, self._img_size, 1)) 
        # 得到生成图像,将随机向量的placeholder作为输入
        generated_imgs = self._generator(self._z_placeholder, training=True)
        
        # 计算真实图像和生成图像各自的判别结果
        fake_img_logits = self._discriminator(generated_imgs, training=True)
        real_img_logits = self._discriminator(self._img_placeholder, training=True)
        
        # 分别定义两个网络的损失函数
        # 对于生成器来说,希望将其生成的图像输入到判别器中,判别结果为1
        loss_on_fake_to_real = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                # 这里的labels对应每个图像的真是标签都是1,所以可以直接使用sparse的loss
                labels=tf.ones([self._batch_size], dtype=tf.int64),
                logits=fake_img_logits))
        # 对于判别器来说,希望判别输入图像为真实图像时判别结果为1
        # 希望判别输入图像为生成的图像时判别结果为0
        loss_on_real_to_real = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.ones([self._batch_size], dtype=tf.int64),
                logits=real_img_logits))
        loss_on_fake_to_fake = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.zeros([self._batch_size], dtype=tf.int64),
                logits=fake_img_logits))
        
        # 计算两个网络的总loss
        # G
        tf.add_to_collection('g_losses', loss_on_fake_to_real)
        # D
        tf.add_to_collection('d_losses', loss_on_real_to_real)
        tf.add_to_collection('d_losses', loss_on_fake_to_fake)
        
        # total loss
        loss = {
            'g' : tf.add_n(tf.get_collection('g_losses'),
                          name='total_g_loss'),
            'd' : tf.add_n(tf.get_collection('d_losses'),
                          name='total_d_loss')
        }
        
        return (self._z_placeholder, 
                self._img_placeholder, 
                generated_imgs, 
                loss)
    
    def build_train_op(self, losses, learning_rate, beta1):
        """
        该函数必须在build构建计算图之后执行
        beta1是adamOptimizer的超参数
        """
        # 两个网络分开训练,这里分开定义两个opt
        g_opt = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                      beta1=beta1)
        d_opt = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                      beta1=beta1)
        
        # 传入各自的loss和参数变量
        g_opt_op = g_opt.minimize(
            losses['g'], var_list=self._generator.variables)
        d_opt_op = d_opt.minimize(
            losses['d'], var_list=self._discriminator.variables)
        
        # 为了实现G和D是交叉训练的,即训练一次生成器就训练一次判别器
        with tf.control_dependencies([g_opt_op, d_opt_op]):
            return tf.no_op(name='train')
        
dcgan = DCGAN(hparams)
z_placeholder, img_placeholder, generated_imgs, losses = dcgan.build()
train_op = dcgan.build_train_op(losses, hparams.learning_rate, hparams.beta1)

In [5]:
# training process
def combine_imgs(batch_imgs, img_size, rows=8, cols=16):
    """
    把一个batch的小图像合并成一个大图,方便显示
    """
    # batch_imgs :[batch_size, img_size, img_size, 1]
    result_big_img = []
    for i in range(rows):
        row_imgs = [] # 每一行的合并图像
        for j in range(cols):
            img = batch_imgs[i * cols + j] # 获取到某个图像
            # 这里有可能是train_img,原始size为28x28,这里将他们统一成32x32
            img = img.reshape((img_size, img_size))
            # 生成的图像像素值是-1到1,这里将像素值恢复到0~255
            img = (img + 1) * 127.5
            row_imgs.append(img)
        # 这里row_imgs里面都是单个的图像,在横向将他们合并
        row_imgs = np.hstack(row_imgs)
        # 将上面合并后的一行图像放入结果
        result_big_img.append(row_imgs)
    # 当前result_big_img中是8行图像,将他们合并
    result_big_img = np.vstack(result_big_img)
    # 为了显示图像,将像素值类型改为整形
    result_big_img = np.asarray(result_big_img, np.uint8)
    # 转成PIL类型
    result_big_img = Image.fromarray(result_big_img)
    return result_big_img

init_op = tf.global_variables_initializer()
# 迭代10000次
train_steps = 10000

with tf.Session() as sess:
    sess.run(init_op)
    for step in range(train_steps):
        # 每次训练先去取训练数据
        batch_imgs, batch_z = mnist_data.next_batch(hparams.batch_size)
        # 每次迭代需要的元素,op,两个网络的loss
        fetches = [train_op, losses['g'], losses['d']]
        # 每迭代50次保存一次生成图像
        should_sample = (step + 1) % 50 == 0
        # 每500次打印一次训练信息
        is_show_message = (step + 1) % 500 == 0
        if should_sample:
            fetches += [generated_imgs] # 每五十次将生成的图像显示并保存一下
        output_values = sess.run(fetches, feed_dict={
            z_placeholder: batch_z,
            img_placeholder: batch_imgs
        })
        # 读取训练信息
        _, g_loss_val, d_loss_val = output_values[0 : 3]
        if is_show_message:
            logging.info('step: %4d, g_loss: %4.3f, d_loss: %4.3f'
                    % (step, g_loss_val, d_loss_val))
        
        # 将每50次后的结果保存一次
        if should_sample:
            gen_imgs_val = output_values[3] # 生成的图像
            # 生成图像和原始图像的路径
            gen_img_path = os.path.join(output_dir, '%05d-gen.jpg' % (step + 1))
            raw_img_path = os.path.join(output_dir, '%05d-raw.jpg' % (step + 1))
            # 将生成的图像和一个batchSize的原始图像合并
            gen_img = combine_imgs(gen_imgs_val, hparams.img_size)
            raw_img = combine_imgs(batch_imgs, hparams.img_size)
            # 保存图像
            gen_img.save(gen_img_path)
            raw_img.save(raw_img_path)

INFO:tensorflow:step:  499, g_loss: 0.869, d_loss: 0.786
INFO:tensorflow:step:  999, g_loss: 0.766, d_loss: 1.005
INFO:tensorflow:step: 1499, g_loss: 1.590, d_loss: 0.669
INFO:tensorflow:step: 1999, g_loss: 1.333, d_loss: 0.797
INFO:tensorflow:step: 2499, g_loss: 2.912, d_loss: 0.499
INFO:tensorflow:step: 2999, g_loss: 1.642, d_loss: 0.377
INFO:tensorflow:step: 3499, g_loss: 3.215, d_loss: 0.607
INFO:tensorflow:step: 3999, g_loss: 3.222, d_loss: 0.471
INFO:tensorflow:step: 4499, g_loss: 3.852, d_loss: 0.086
INFO:tensorflow:step: 4999, g_loss: 1.802, d_loss: 0.434
INFO:tensorflow:step: 5499, g_loss: 3.792, d_loss: 0.047
INFO:tensorflow:step: 5999, g_loss: 5.662, d_loss: 0.200
INFO:tensorflow:step: 6499, g_loss: 4.529, d_loss: 0.363
INFO:tensorflow:step: 6999, g_loss: 3.421, d_loss: 0.268
INFO:tensorflow:step: 7499, g_loss: 4.430, d_loss: 0.027
INFO:tensorflow:step: 7999, g_loss: 4.151, d_loss: 0.049
INFO:tensorflow:step: 8499, g_loss: 5.659, d_loss: 0.037
INFO:tensorflow:step: 8999, g_l

### 训练结果
* 训练500次后生成的图像 
    * 训练图像 
        ![img](./local_run/00500-raw.jpg)  
    * 生成图像   
        ![img](./local_run/00500-gen.jpg)  

* 训练5000次的生成图像   
    ![img](./local_run/05000-gen.jpg)
* 训练9000次的生成图像   
    ![img](./local_run/09000-gen.jpg)