In [1]:
import tensorflow as tf
from tensorflow import keras
# 引用经典数据集时的 datasets，适用容器 Sequential, 网络层的父类 layers，网络的父类 Model,优化器 optimizers
# 损失类 loss
from tensorflow.keras import datasets,layers,optimizers, Model, losses
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

In [2]:
a = np.array([[-1, 3, 9],[3, 5, -3]])
b = tf.reduce_mean(a, axis=0)
b

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 4, 3])>

In [3]:
batchsz = 512
shuffles = 10000
lr = 1e-3
z_dim = 10
epochs = 20

In [4]:
# 将多张图片合并并保存为一张大图
def save_images(imgs, name):
    
    new_im = Image.new('L', (280, 280))
    index = 0
    for i in range(0, 280, 28):
        for j in range(0, 280, 28):
            im = imgs[index]
            im = Image.fromarray(im, mode='L')
            new_im.paste(im, (i, j))
            index += 1
    new_im.save(name)

In [5]:
#以图片数据为例，适用于全连接层
def preprocess(x):
    """
    自定义预处理函数
    参数：
    x -- 待处理的数据集,维度[b,28,28]
    y -- 待处理的数据集,[b]

    返回值：
    x -- 标准化和扁平化后的 x
    y -- 转换为 one_hot 向量
    """
    x = tf.cast(x, dtype=tf.float32) / 255. #标准化0-1
    #扁平化-不建议，后续需要用 squeeze函数处理。者此处不设，而在输入的时候对x设置x.reshape(x,(-1, 28*28))
    #  注意与 [-1, 28*28]的区别
    x = tf.reshape(x, [28*28])


    return x

In [6]:
(x,y), (x_test, y_test)=datasets.fashion_mnist.load_data()
print('数据集fabshionmnist中x的维度', x.shape, '数据集fashionmnist中y的维度', y.shape)
#数据加载后，需要转换为 Dataset对象
train_db = tf.data.Dataset.from_tensor_slices(x)
test_db = tf.data.Dataset.from_tensor_slices(x_test)

# db = db.step1().step2().step3()
# shuffle(缓冲区大小，一般是一个较大的常数)，btach(一批的样本数量)
train_db = train_db.map(preprocess).shuffle(shuffles).batch(batchsz)
test_db = test_db.map(preprocess).batch(batchsz)
print(train_db, test_db)

数据集fabshionmnist中x的维度 (60000, 28, 28) 数据集fashionmnist中y的维度 (60000,)
<BatchDataset shapes: (None, 784), types: tf.float32> <BatchDataset shapes: (None, 784), types: tf.float32>


### VAE(变分编码器的实现)

<img src="img/02.jpg"/>

首先通过编码器获得隐变量 z 的分布，而后利用 Reparameterization Trick 进行采样获得 z

In [7]:
class VAE(Model):
    def __init__(self):
        super(VAE, self).__init__()
        # Encoder
        self.fc1 = layers.Dense(128, activation='relu')
        self.fc2 = layers.Dense(z_dim)
        self.fc3 = layers.Dense(z_dim)

        # Decoder
        self.fc4 = layers.Dense(128, activation='relu')
        self.fc5 = layers.Dense(28*28)

    def encoder(self, x):
        h = self.fc1(x)
        mean = self.fc2(h)
        log_var = self.fc3(h)
        return mean, log_var
    
    def decoder(self, z):
        out = self.fc4(z)
        out = self.fc5(out)
        return out

    def reparameterization(self, mu, log_var):
        # eps 采样自正态分布
        eps = tf.random.normal(log_var.shape)
        # 标准差为方差开方 
        std = tf.exp(0.5*log_var)
        # 重采样的 z
        z = mu + std * eps
        return z
    
    # 前向传播
    def call(self, inputs, training=None):
        mu, log_var = self.encoder(inputs)
        z = self.reparameterization(mu, log_var)
        # 重建值 x_bar
        x_bar = self.decoder(z)
        return x_bar, mu, log_var

In [8]:
model = VAE()
model.build(input_shape=(4, 784))
model.summary()

Model: "vae"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  100480    
_________________________________________________________________
dense_1 (Dense)              multiple                  1290      
_________________________________________________________________
dense_2 (Dense)              multiple                  1290      
_________________________________________________________________
dense_3 (Dense)              multiple                  1408      
_________________________________________________________________
dense_4 (Dense)              multiple                  101136    
Total params: 205,604
Trainable params: 205,604
Non-trainable params: 0
_________________________________________________________________


In [None]:
# 网络训练
optimizer = optimizers.Adam(learning_rate=lr)
for epoch in range(epochs):
    for step, x in enumerate(train_db):
        with tf.GradientTape() as tape:
            logits, mu, log_var = model(x)

            # loss = losses.binary_crossentropy(x, logits, from_logits=True)
            # loss = tf.reduce_mean(loss)

            # 仅仅对axis=0，行进行平均,损失值更大，可能使得收敛更快
            loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=logits) 
            loss = tf.reduce_sum(loss) / x.shape[0]

            # KL(p,q), p-N(mu, var); q-N(0, 1);
            kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var))
            kl_div = tf.reduce_sum(kl_div) / x.shape[0]

            losses = loss + 1. * kl_div

        grads = tape.gradient(losses, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if step % 100 == 0:
            print(epoch, step, 'kl:', float(kl_div), 'loss', float(loss))
    
    # 测试
    z = tf.random.normal((batchsz, z_dim))
    z_logits= model.decoder(z)
    x_bar = tf.sigmoid(z_logits)
    x_bar = tf.reshape(x_bar, [-1, 28, 28]).numpy() * 255.
    x_bar = x_bar.astype(np.uint8)

    save_images(x_bar, 'vae_img/sampled_epoch%d.png'%epoch)

    x = next(iter(test_db))
    x_bar_logits, _, _ = model(x) 
    x_bar = tf.sigmoid(x_bar_logits)
    x_bar = tf.reshape(x_bar, [-1, 28, 28]).numpy() * 255.
    x_bar = x_bar.astype(np.uint8)
    
    save_images(x_bar, 'vae_img/rec_epoch%d.png'%epoch)