# Introduction on the real BACMMAN dataset

In [6]:
# mount drive
from google.colab import drive
ROOT = '/content/drive'     # default for the drive
drive.mount(ROOT, force_remount=True)
os.chdir("/content/drive/My Drive/BACMMAN_DISTNET")

# install/load packages
!pip install git+https://github.com/jeanollion/dataset_iterator.git
# !pip install git+https://github.com/hugovaysset/uad.git
import tensorflow as tf
import h5py
import numpy as np
import matplotlib.pyplot as plt
import os
from uad.models.variational_autoencoder import VAE  # package not found when install from github
from tensorflow.keras import layers, Model

# copy data locallly
dataset_dir = f"{ROOT}/My Drive/BACMMAN_DISTNET/data/BACMMAN/"
!cp "/content/drive/My Drive/BACMMAN_DISTNET/data/BACMMAN/train_val_eval.h5" "/home/train_val_eval.h5"
dataset_path = "/home/train_val_eval.h5"
print(dataset_path)

!nvidia-smi

Mounted at /content/drive
Collecting git+https://github.com/jeanollion/dataset_iterator.git
  Cloning https://github.com/jeanollion/dataset_iterator.git to /tmp/pip-req-build-_g6jvukk
  Running command git clone -q https://github.com/jeanollion/dataset_iterator.git /tmp/pip-req-build-_g6jvukk
Building wheels for collected packages: dataset-iterator
  Building wheel for dataset-iterator (setup.py) ... [?25l[?25hdone
  Created wheel for dataset-iterator: filename=dataset_iterator-0.0.1-cp36-none-any.whl size=35945 sha256=dcd599b51664ce0c54a00df3a22a7ccd63afc3f00f0da773fb312c72de116447
  Stored in directory: /tmp/pip-ephem-wheel-cache-83rmhveh/wheels/e1/b1/4a/efb8888afaa53e058db8520c203e13af93ca10b2a8d42ab098
Successfully built dataset-iterator
/home/train_val_eval.h5
Tue Jul 28 16:45:53 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-----------------------------

## Load datasets

In [36]:
from dataset_iterator import MultiChannelIterator, PreProcessingImageGenerator

image_scaling = PreProcessingImageGenerator(lambda im:im/(2**16 - 1)) # simple scaling of 8-bit images -> data in [0, 1]

x_train = MultiChannelIterator(dataset=dataset_path, 
                                channel_keywords=["/raw"],
                                group_keyword = "train", 
                                input_channels=[0],
                                output_channels=[0], 
                                image_data_generators=[image_scaling],
                                batch_size=1
                                )

x_val = MultiChannelIterator(dataset=dataset_path, 
                                channel_keywords=["/raw"],
                                group_keyword = "val",
                                input_channels=[0],
                                output_channels=[0], 
                                image_data_generators=[image_scaling],
                                batch_size=1
                                )

x_test = MultiChannelIterator(dataset=dataset_path, 
                                channel_keywords=["/raw"],
                                group_keyword = "eval",
                                input_channels=[0],
                                output_channels=[0], 
                                image_data_generators=[image_scaling],
                                batch_size=1
                                )

print(f"Number of batches {len(x_train)} of size {len(x_train[0][0])}")

Number of batches 9868 of size 1


In [20]:
for i in range(len(x_train)):
    if len(x_train[i][0]) != 64:
        print(i, len(x_train[i][0]))

154 12


## Build model

In [8]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim1, dim2, dim3 = tf.shape(z_mean)[1], tf.shape(z_mean)[2], tf.shape(z_mean)[3]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim1, dim2, dim3))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


def conv2d_block(input_tensor, n_filters, kernel_size=(3, 1), batchnorm=True, activation1="relu",
                 activation2="sigmoid"):
    """Function to add 2 convolutional layers with the parameters passed to it
    activation1: name of the activation function to apply. If none, pass "" (empty string)
    activation2: name of the activation function to apply. If none, pass "" (empty string)
    """
    # first layer
    x = layers.Conv2D(filters=n_filters, kernel_size=kernel_size, 
                      kernel_initializer='he_normal', padding='same')(input_tensor)
    if batchnorm:
        x = layers.BatchNormalization()(x)
    if activation1 != "":
        x = layers.Activation(activation1)(x)

    # second layer
    x = layers.Conv2D(filters=n_filters, kernel_size=kernel_size, 
                      kernel_initializer='he_normal', padding='same')(input_tensor)
    if batchnorm:
        x = layers.BatchNormalization()(x)
    if activation2 != "":
        x = layers.Activation(activation2)(x)

    return x


In [9]:
n_filters = 16
n_contractions = 5
latent_depth = n_filters * int(2**n_contractions)
latent_dims = (int(256 / (2**n_contractions)), int(32 / (2**n_contractions)), latent_depth)
batchnorm, dropout = False, 0.2
k_size = (3, 1)

encoder_inputs = layers.Input(shape=(256, 32, 1), name="encoder_inputs")

# contracting path
for i in range(n_contractions):
    if i == 0:
        x = conv2d_block(encoder_inputs, n_filters * 2**i, kernel_size=k_size, 
                         batchnorm=batchnorm)
    else:
        x = conv2d_block(x, n_filters * 2**i, kernel_size=k_size, batchnorm=batchnorm)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(dropout)(x)

z_mean = layers.Conv2D(latent_depth, 1, strides=1, name="z_mean")(x)
z_log_var = layers.Conv2D(latent_depth, 1, strides=1, name="z_log_var")(x)
z = Sampling()((z_mean, z_log_var))

encoder = Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

# Define decoder model.
latent_inputs = layers.Input(shape=latent_dims, name="z_sampling")

for i in range(n_contractions - 1, 0, -1):
    if i == n_contractions - 1:
        x = layers.Conv2DTranspose(n_filters * 2**i, k_size, strides=(2, 2), 
                                   padding='same')(latent_inputs)
    else:
        x = layers.Conv2DTranspose(n_filters * 2**i, k_size, strides=(2, 2), 
                                   padding='same')(x)
    x = layers.Dropout(dropout)(x)
    x = conv2d_block(x, n_filters * 2**i, kernel_size=k_size, batchnorm=batchnorm)

x = layers.Conv2DTranspose(n_filters * 2**i, kernel_size=k_size, strides=(2, 2), 
                            padding='same')(x)
x = layers.Dropout(dropout)(x)
x = layers.Conv2D(1, kernel_size=k_size, padding="same")(x)


decoder = Model(inputs=latent_inputs, outputs=x, name="decoder")


In [14]:
class VAE(Model):
    """
    Variational autoencoder without predefined architecture. Build the encoder and decoder
    using the keras functional API and pass them as arguments to the class to instantiate
    a custom VAE model.
    """

    def __init__(self, encoder, decoder, dims=(28, 28, 1), reconstruction_loss="mse", **kwargs):
        """
        :param encoder:
        :param decoder:
        :param dims:
        :param reconstruction_loss: name of the reconstruction loss to use (can be "xent" for MNIST or "mse" for real
        images
        """
        super(VAE, self).__init__(**kwargs)
        self.dims = dims
        self.encoder = encoder
        self.decoder = decoder
        self.reconstruction_loss = reconstruction_loss

    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0]

        with tf.GradientTape() as tape:

            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)

            if self.reconstruction_loss == "xent":
                reconstruction_loss = tf.reduce_mean(
                    tf.keras.losses.binary_crossentropy(data, reconstruction)
                )
                reconstruction_loss *= self.dims[0] * self.dims[1]
            elif self.reconstruction_loss == "mse":
                reconstruction_loss = tf.keras.losses.MSE(data, reconstruction)
            else:
                raise NotImplementedError("Reconstruction loss should be either xent or mse")
            kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
            kl_loss = tf.reduce_mean(kl_loss)
            kl_loss *= -0.5
            total_loss = reconstruction_loss + kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        
        return {
            "loss": total_loss,
            "reconstruction_loss": reconstruction_loss,
            "kl_loss": kl_loss,
        }

    def test_step(self, data):
        if isinstance(data, tuple):
            data = data[0]

        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)

            if self.reconstruction_loss == "xent":
                reconstruction_loss = tf.reduce_mean(
                    tf.keras.losses.binary_crossentropy(data, reconstruction)
                )
                reconstruction_loss *= self.dims[0] * self.dims[1]
            elif self.reconstruction_loss == "mse":
                reconstruction_loss = tf.keras.losses.MSE(data, reconstruction)
            else:
                raise NotImplementedError("Reconstruction loss should be either xent or mse")
            kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
            kl_loss = tf.reduce_mean(kl_loss)
            kl_loss *= -0.5
            total_loss = reconstruction_loss + kl_loss

        return {
            "loss": total_loss,
            "reconstruction_loss": reconstruction_loss,
            "kl_loss": kl_loss,
        }

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        return self.decoder(z)

vae = VAE(encoder, decoder, dims=(256, 32, 1), reconstruction_loss="mse")

vae.compile(optimizer=tf.keras.optimizers.Adam())

In [38]:
epochs = 10
batch_size = 64

history = vae.fit_generator(x_train, validation_data=x_val, steps_per_epoch=len(x_train) // batch_size,
	epochs=epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
