# VAE训练 - 基于 Faces 数据集
## 引入

In [1]:
import os
import numpy as np
from glob import glob
import tensorflow as tf

from models import VariationalAutoEncoder

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

## 环境参数

In [2]:
# run params
SECTION = "vae"
RUN_ID = "0001"
DATA_NAME = "faces"
RUN_FOLDER = "run/{}/".format(SECTION)
RUN_FOLDER += "_".join([RUN_ID, DATA_NAME])

if not os.path.exists(RUN_FOLDER):
    os.makedirs(RUN_FOLDER)
    os.makedirs(os.path.join(RUN_FOLDER, "viz"))
    os.makedirs(os.path.join(RUN_FOLDER, "images"))
    os.makedirs(os.path.join(RUN_FOLDER, "weights"))

MODE = "build" # "load"

DATA_FOLDER = "./data/celeb"

## 加载数据

In [3]:
INPUT_DIM = (128, 128, 3)
BATCH_SIZE = 32

filenames = np.array(glob(os.path.join(DATA_FOLDER, "*/*.jpg")))
NUM_IMAGES = len(filenames)
print(NUM_IMAGES)

202599


In [4]:
data_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255.0)

data_flow = data_gen.flow_from_directory(
    DATA_FOLDER,
    target_size=INPUT_DIM[:2],
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode="input",
    subset="training"
)

Found 202599 images belonging to 1 classes.


## 模型架构

In [5]:
vae = VariationalAutoEncoder(
    input_dim=INPUT_DIM,
    encoder_conv_filters=[32, 64, 64, 64],
    encoder_conv_kernel_size=[3, 3, 3, 3],
    encoder_conv_strides=[2, 2, 2, 2],
    decoder_conv_t_filters=[64, 64, 32, 3],
    decoder_conv_t_kernel_size=[3, 3, 3, 3],
    decoder_conv_t_strides=[2, 2, 2, 2],
    z_dim=200,
    use_batch_norm=True,
    use_dropout=True
)

if MODE == "build":
    vae.save(RUN_FOLDER)
else:
    vae.load_weights(os.path.join(RUN_FOLDER, "weights/weights.h5"))

In [6]:
vae.encoder.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
encoder_conv_0 (Conv2D)         (None, 64, 64, 32)   896         encoder_input[0][0]              
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 64, 64, 32)   128         encoder_conv_0[0][0]             
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 64, 64, 32)   0           batch_normalization[0][0]        
____________________________________________________________________________________________

In [7]:
vae.decoder.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
decoder_input (InputLayer)   [(None, 200)]             0         
_________________________________________________________________
dense (Dense)                (None, 4096)              823296    
_________________________________________________________________
reshape (Reshape)            (None, 8, 8, 64)          0         
_________________________________________________________________
decoder_conv_t_0 (Conv2DTran (None, 16, 16, 64)        36928     
_________________________________________________________________
batch_normalization_4 (Batch (None, 16, 16, 64)        256       
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 16, 16, 64)        0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 16, 16, 64)        0   

## 训练

In [None]:
LEARNING_RATE = 0.0005
R_LOSS_FACTOR = 10000
EPOCHS = 200
PRINT_EVERY_N_BATCHES = 1000
INITIAL_EPOCH = 0

vae.compile(LEARNING_RATE, R_LOSS_FACTOR)

vae.train(
    data_flow,
    epochs=EPOCHS,
    steps_per_epoch=NUM_IMAGES / BATCH_SIZE,
    run_folder=RUN_FOLDER,
    print_every_n_batches=PRINT_EVERY_N_BATCHES,
    initial_epoch=INITIAL_EPOCH
)