In [1]:
import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import layers, Model, losses
from dataset import make_anime_dataset
# from scipy.misc import toimage 
# misc模块在 1.10.0弃用，会在2.0.0完全废除，用image.fromarray()代替
from PIL import Image #Python Image Library
import glob
import numpy as np

由 5 个转置卷积层组成，实现特征图高宽的放大，特征图通道层层减少。

- 将长度为 100 的隐藏变量 $z$ 变换为 [b, 1, 1, 100]
- 依次通过转置卷积层，放大高宽维度，减少通道维度，最后高宽为 64×64，通道为 3
- 每个卷积层中插入 BN 层提高训练稳定性，卷积层选择不使用偏置向量

<img src="img/01.jpg"/>

In [2]:
# 生成器网络类
class Generator(keras.Model):
    # [b, 100] => [b, 64, 64, 3]
    
    def __init__(self):
        super(Generator, self).__init__()
        filter = 64
        # 输出 channel 为 filter*8,核大小为4， 步长为1，不使用padding和偏置
        self.conv1 = layers.Conv2DTranspose(filter*8, 4, 1, 'valid', use_bias=False)
        self.bn1 = layers.BatchNormalization()

        self.conv2 = layers.Conv2DTranspose(filter*4, 4, 2, 'same', use_bias=False)
        self.bn2 = layers.BatchNormalization()
        # 转置卷积层3
        self.conv3 = layers.Conv2DTranspose(filter*2, 4,2, 'same', use_bias=False)
        self.bn3 = layers.BatchNormalization()
        # 转置卷积层4
        self.conv4 = layers.Conv2DTranspose(filter*1, 4,2, 'same', use_bias=False)
        self.bn4 = layers.BatchNormalization()
        # 转置卷积层5
        self.conv5 = layers.Conv2DTranspose(3, 4,2, 'same', use_bias=False)
    # 
    def call(self, inputs, training = None):
        # [z, 100]
        x = inputs
        # [b, 1, 1, 100]
        # x = tf.reshape(x, (-1, 1, 1, x.shape[1]))
        x = tf.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
        # 激活函数
        x = tf.nn.relu(x)
        
        x = tf.nn.relu(self.bn1(self.conv1(x), training=training))
        x = tf.nn.relu(self.bn2(self.conv2(x), training=training))
        x = tf.nn.relu(self.bn3(self.conv3(x), training=training))
        x = tf.nn.relu(self.bn4(self.conv4(x), training=training))

        x = self.conv5(x)
        # 输出范围 -1~1
        x = tf.tanh(x)

        return x
        

In [3]:
# 分类器，普通意义上的分类网络
class Discriminator(Model):
    def __init__(self):
        super(Discriminator, self).__init__()
        filter = 64
        # [b, 64, 64, 3]=>[b, 1]
        # filter:64, kernek:5, stride:3
        self.conv1 = layers.Conv2D(filter, 4, 2, 'valid', use_bias=False)
        self.bn1 = layers.BatchNormalization()
        # 使得GAN training 更加稳定
        self.conv2 = layers.Conv2D(filter*2, 4, 2, 'valid', use_bias=False)
        self.bn2 = layers.BatchNormalization()

        self.conv3 = layers.Conv2D(filter*4, 4, 2, 'valid', use_bias=False)
        self.bn3 = layers.BatchNormalization()

        self.conv4 = layers.Conv2D(filter*8, 3, 1, 'valid', use_bias=False)
        self.bn4 = layers.BatchNormalization()
        
        self.conv5 = layers.Conv2D(filter*16, 3, 1, 'valid', use_bias=False)
        self.bn5 = layers.BatchNormalization()

        # 全局池化层
        self.pool = layers.GlobalAveragePooling2D()
        # 特征打平
        self.flatten = layers.Flatten()
        # 全连接层分类
        self.fc = layers.Dense(1)

    
    def call(self, inputs, training=None):
        # (4, 31, 31, 64)
        x = tf.nn.leaky_relu(self.bn1(self.conv1(inputs), training=training))
        # (4, 14, 14, 128)
        x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training))
        # (4, 6, 6, 256)
        x = tf.nn.leaky_relu(self.bn3(self.conv3(x), training=training))
        # (4, 4, 4, 512)
        x = tf.nn.leaky_relu(self.bn4(self.conv4(x), training=training))
        # 卷积-BN-激活函数:(4, 2, 2, 1024)
        x = tf.nn.leaky_relu(self.bn5(self.conv5(x), training=training))
        # 卷积-BN-激活函数:(4, 1024)
        x = self.pool(x)
        # 打平
        x = self.flatten(x)
        # 输出，[b, 1024] => [b, 1]
        logits = self.fc(x)

        return logits

### 训练

In [4]:
# 将多张照片合并,辅助函数
def save_result(val_out, val_block_size, image_path, color_mode):
    def preprocess(img):
        img = ((img + 1.0) * 127.5).astype(np.uint8)
        # img = img.astype(np.uint8)
        return img

    preprocesed = preprocess(val_out)
    final_image = np.array([])
    single_row = np.array([])
    for b in range(val_out.shape[0]):
        # concat image into a row
        if single_row.size == 0:
            single_row = preprocesed[b, :, :, :]
        else:
            single_row = np.concatenate((single_row, preprocesed[b, :, :, :]), axis=1)

        # concat image row to final_image
        if (b+1) % val_block_size == 0:
            if final_image.size == 0:
                final_image = single_row
            else:
                final_image = np.concatenate((final_image, single_row), axis=0)

            # reset single row
            single_row = np.array([])

    if final_image.shape[2] == 1:
        final_image = np.squeeze(final_image, axis=2)
    Image.fromarray(final_image).save(image_path)

In [5]:
def celoss_ones(logits):
    """
    计算与标签1的交叉熵CE
    """
    y = tf.ones_like(logits)
    loss = losses.binary_crossentropy(y, logits, from_logits=True)
    return tf.reduce_mean(loss)

In [6]:
def celoss_zeros(logits):
    """
    计算与标签1的交叉熵CE
    """
    y = tf.zeros_like(logits)
    loss = losses.binary_crossentropy(y, logits, from_logits=True)
    return tf.reduce_mean(loss)

In [7]:
# 判别器的损失函数
def d_loss_fn(generator, discriminator, batch_z, batch_x, is_training):
    # 采样生成图片
    fake_image = generator(batch_z, is_training)
    # 判别
    d_fake_logits = discriminator(fake_image, is_training)
    d_real_logits = discriminator(batch_x, is_training)
    # 误差计算,判别器希望真的都判别为真，假的都分辨出为假。
    # 真实值与 1 的损失最小，假的与0的损失最小，最终损失为二者之和
    d_loss_real = celoss_ones(d_real_logits)
    d_loss_fake = celoss_zeros(d_fake_logits)

    # 合并误差
    loss = d_loss_fake + d_loss_real
    return loss

In [8]:
# 生成器的损失函数
def g_loss_fn(generator, discriminator, batch_z, is_training):
    fake_image = generator(batch_z, is_training)
    d_fake_logits = discriminator(fake_image, is_training)
    # 假的图片希望尽可能接近1，即真实图片
    loss = celoss_ones(d_fake_logits)
    return loss

此处利用直接编写好的 `make_anime_dataset` 函数

共21551张图片

In [9]:
# 设置随机种子，方便复现
tf.random.set_seed(22)
batch_size = 64
z_dim = 100
epochs = 1000000
lr = 2e3
is_training = True
# 判别器训练5次后，生成器训练1次
k =5 
# 设置本地数据集路径
img_path = glob.glob(r'E:\GAN_training_datasets\archive\data\*.png')
# print('images num:', len(img_path))
dataset, img_shape, _ = make_anime_dataset(img_path, batch_size, resize=64)
print(dataset, img_shape)
# 获取第一个样本
sample = next(iter(dataset))
print(sample.shape)

dataset = dataset.repeat(100)
db_iter = iter(dataset)

<PrefetchDataset shapes: (64, 64, 64, 3), types: tf.float32> (64, 64, 3)
(64, 64, 64, 3)


In [None]:
# 生成器
generator = Generator()
# generator.build(input_shape=(None, z_dim))
generator.build(input_shape=(4, z_dim))


discriminator = Discriminator()
# discriminator.build(input_shape=(None, 64, 64, 3))
discriminator.build(input_shape=(4, 64, 64, 3))

# 分别创建优化器
g_optimizer = tf.optimizers.Adam(learning_rate=lr, beta_1=0.5)
d_optimizer = tf.optimizers.Adam(learning_rate=lr, beta_1=0.5)

for epoch in range(epochs):
    batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.)
    batch_x = next(db_iter)

    # 1.设定判别训练5次后，生成器训练1次
    # train D,判别器损失函数，希望能够完全判别，1判别的都是真，0判别的都是假
    for _ in range(k):
        with tf.GradientTape() as tape:
            d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training)
        grads = tape.gradient(d_loss, discriminator.trainable_variables)
        d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))

    # 2.train G，生成器损失函数，希望生成接近真实照片 1 ，即计算与1的损失
    with tf.GradientTape() as tape:
        g_loss = g_loss_fn(generator, discriminator, batch_z, is_training)
    grads = tape.gradient(g_loss, generator.trainable_variables)
    g_optimizer.apply_gradients(zip(grads, generator.trainable_variables))

    if epoch % 100 == 0:
        print(epoch, 'd_loss', float(d_loss), 'g-loss', float(g_loss))