Training notebook for:

Model 3: VAE without fusion

Model 4: VAE with fusion

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


Loading libraries:

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from skimage.io import imsave, imshow, imread
from skimage.color import rgb2lab, lab2rgb, grey2rgb, rgb2grey
from skimage.transform import resize
import os
from sklearn.model_selection import train_test_split
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input, decode_predictions

In [None]:
inception = InceptionResNetV2(weights='imagenet', include_top=True)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5


Data loader:

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    """Generates data for Keras
    Sequence based data generator. Suitable for building data generator for training and prediction.
    """
    def __init__(self, list_IDs, image_path,
                 to_fit=True, batch_size=32, dim=(256, 256), shuffle=True, fusion=False, fusion_path='fusion/'):
        """Initialization

        :param list_IDs: list of all 'label' ids to use in the generator
        :param image_path: path to images location
        :param to_fit: True to return X and y, False to return X only
        :param batch_size: batch size at each iteration
        :param dim: tuple indicating image dimension
        :param shuffle: True to shuffle label indexes after every epoch
        :param fusion: True to return X and X_fusion, False returns X
        """
        self.list_IDs = list_IDs
        self.image_path = image_path
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.dim = dim
        self.shuffle = shuffle
        self.fusion = fusion
        self.fusion_path = fusion_path
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch

        :return: number of batches per epoch
        """
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data

        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X = self._generate_X(list_IDs_temp)
        if self.to_fit:
            y = self._generate_y(list_IDs_temp)
            return X, y
        else:
            return X

    def on_epoch_end(self):
        """Updates indexes after each epoch

        """
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def _generate_X(self, list_IDs_temp):
        """Generates data containing batch_size images

        :param list_IDs_temp: list of label ids to load
        :return: batch of images
        """
        # Initialization
        X = np.empty((self.batch_size, *self.dim, 1))
        if self.fusion:
          X_fusion = np.empty((self.batch_size, 1000))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = self._load_lab_grayscale_image(self.image_path + ID)
            if self.fusion:
              X_fusion[i,] = self._load_fusion(self.fusion_path + ID)

        if self.fusion:
          return (X, X_fusion)
        else:
          return X

    def _generate_y(self, list_IDs_temp):
        """Generates data containing batch_size masks

        :param list_IDs_temp: list of label ids to load
        :return: batch if masks
        """
        y = np.empty((self.batch_size, *self.dim, 2))

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            y[i,] = self._load_lab_color_image(self.image_path + ID)

        return y

    def _load_lab_grayscale_image(self, image_path):
      img = imread(image_path)
      img = img*(1.0/255)
      img = resize(img, (256, 256))
      if img.shape == (256, 256):
        img = grey2rgb(img)
      img = rgb2lab(img)
      img =(img[:,:,0]).reshape(img[:,:,0].shape+(1,))
      return img

    def _load_fusion(self, image_path):
      with open(image_path, 'rb') as f:
          embed = np.load(f)
          return embed 

    def _load_lab_color_image(self, image_path):
      img = imread(image_path)
      img = img*(1.0/255)
      img = resize(img, (256, 256))
      if img.shape == (256, 256):
        img = grey2rgb(img)
      img = rgb2lab(img)
      img = img[:,:,1:]*(1.0/128)
      return img

Sampling layer for VAE:

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

Model 4 VAE with fusion:

In [None]:
#VAE with fusion
latent_dim = 256
initializer = tf.keras.initializers.Zeros()

encoder_input = layers.Input(
    shape=(256, 256, 1,), name="input"
) 
embed_input = layers.Input(shape=(1000,))

encoder_output = layers.Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Conv2D(128, (3, 3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Conv2D(256, (3, 3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Flatten()(encoder_output)
encoder_output = layers.Dense(128, activation="relu")(encoder_output)

z_mean = layers.Dense(latent_dim, name="z_mean")(encoder_output)
z_log_var = layers.Dense(latent_dim, name="z_log_var", kernel_initializer=initializer)(encoder_output)
z = Sampling()([z_mean, z_log_var])
z = layers.concatenate([z, embed_input], axis=1)

encoder = keras.Model([encoder_input, embed_input], [z_mean, z_log_var, z], name="encoder")

latent_inputs = keras.Input(shape=(latent_dim +1000,)) #1256 dim vector
decoder_output = layers.Dense(32 * 32 * 128, activation="relu")(latent_inputs)
decoder_output = layers.Reshape((32, 32, 128))(decoder_output)
decoder_output = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(decoder_output)
decoder_output = layers.UpSampling2D((2, 2))(decoder_output)
decoder_output = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(decoder_output)
decoder_output = layers.UpSampling2D((2, 2))(decoder_output)
decoder_output = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(decoder_output)
decoder_output = layers.Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = layers.UpSampling2D((2, 2))(decoder_output)
decoder = keras.Model(latent_inputs, decoder_output, name="decoder")

class VAEFusion(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAEFusion, self).__init__(**kwargs)
        print("in init")
        self.encoder = encoder
        self.decoder = decoder
        print("set encoder and decoder")
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        #self.accuracy = tf.keras.metrics.Accuracy()

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
            #self.accuracy,
            
        ]

    def train_step(self, data):
        # adapt to taking in the dataloader
        print("in train")
        print(data)
        input, hat = data
        with tf.GradientTape() as tape:
            # pull out x from data, feed into encoder and decoder
            z_mean, z_log_var, z = self.encoder(input)
            reconstruction = self.decoder(z)

            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.mse(hat, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + .01*kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        #self.accuracy.update_state(hat, reconstruction)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            #"accuracy": self.accuracy.result(),
        }
    def test_step(self, data):
        # Unpack the data
        input, hat = data
        # Compute predictions
        z_mean, z_log_var, z = self.encoder(input)
        reconstruction = self.decoder(z)
        reconstruction_loss = tf.reduce_mean(
            tf.reduce_sum(
                keras.losses.mse(hat, reconstruction), axis=(1, 2)
            )
        )
        kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
        total_loss = reconstruction_loss + 0.01*kl_loss
        # Updates the metrics tracking the loss
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        #self.accuracy.update_state(hat, reconstruction)
        # Return a dict mapping metric names to current value.
        # Note that it will include the loss (tracked in self.metrics).
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            #"accuracy": self.accuracy.result(),
        }

    def call(self, data):
        z_mean, z_log_var, z = self.encoder(data)
        reconstruction = self.decoder(z) 
        return reconstruction

Model 3 VAE without fusion:

In [None]:
# VAE without fusion
latent_dim = 256
initializer = tf.keras.initializers.Zeros()

encoder_input = layers.Input(
    shape=(256, 256, 1,), name="input"
) 

encoder_output = layers.Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Conv2D(128, (3, 3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Conv2D(256, (3, 3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(encoder_output)
encoder_output = layers.Flatten()(encoder_output)
encoder_output = layers.Dense(128, activation="relu")(encoder_output)

z_mean = layers.Dense(latent_dim, name="z_mean")(encoder_output)
z_log_var = layers.Dense(latent_dim, name="z_log_var", kernel_initializer=initializer)(encoder_output)
z = Sampling()([z_mean, z_log_var])

encoder = keras.Model(encoder_input, [z_mean, z_log_var, z], name="encoder")

latent_inputs = keras.Input(shape=(latent_dim,))
decoder_output = layers.Dense(32 * 32 * 128, activation="relu")(latent_inputs)
decoder_output = layers.Reshape((32, 32, 128))(decoder_output)
decoder_output = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(decoder_output)
decoder_output = layers.UpSampling2D((2, 2))(decoder_output)
decoder_output = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(decoder_output)
decoder_output = layers.UpSampling2D((2, 2))(decoder_output)
decoder_output = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(decoder_output)
decoder_output = layers.Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = layers.UpSampling2D((2, 2))(decoder_output)
decoder = keras.Model(latent_inputs, decoder_output, name="decoder")

class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        # adapt to taking in the dataloader
        input, hat = data
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(input)
            reconstruction = self.decoder(z)
            print(z)
            #hat = data[:,:,:,1:3]/128
            print(hat)

            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.mse(hat, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + .01*kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }
    def test_step(self, data):
        # Unpack the data
        input, hat = data
        # Compute predictions
        z_mean, z_log_var, z = self.encoder(input)
        reconstruction = self.decoder(z)
        reconstruction_loss = tf.reduce_mean(
            tf.reduce_sum(
                keras.losses.mse(hat, reconstruction), axis=(1, 2)
            )
        )
        kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
        total_loss = reconstruction_loss + 0.01*kl_loss
        # Updates the metrics tracking the loss
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        # Return a dict mapping metric names to current value.
        # Note that it will include the loss (tracked in self.metrics).
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def call(self, data):
        z_mean, z_log_var, z = self.encoder(data)
        reconstruction = self.decoder(z) 
        return reconstruction

Loading data list, making inception-resnet-v2 representations

In [None]:
with open("places_sample_train.txt") as f:
  train_list = f.readlines()
train_list = [x[:-1] for x in train_list]

with open("places_sample_val.txt") as f:
  val_list = f.readlines()
val_list = [x[:-1] for x in val_list]

In [None]:
!mkdir fusion

mkdir: cannot create directory ‘fusion’: File exists


In [None]:
for image in val_list:
      img = imread('./gdrive/MyDrive/val_256/'+ image)
      img = img*(1.0/255)
      img = grey2rgb(rgb2grey(img))
      img = resize(img, (299, 299))
      img = preprocess_input(img)
      img = img.reshape((1,) + img.shape)
      embed = inception.predict(img)
      with open('./fusion/' + image, 'wb') as f:
          np.save(f, embed)

In [None]:
for image in train_list:
      img = imread('./gdrive/MyDrive/val_256/'+ image)
      img = img*(1.0/255)
      img = grey2rgb(rgb2grey(img))
      img = resize(img, (299, 299))
      img = preprocess_input(img)
      img = img.reshape((1,) + img.shape)
      embed = inception.predict(img)
      with open('./fusion/' + image, 'wb') as f:
          np.save(f, embed)

Training model 4:

In [None]:
image_path = "gdrive/MyDrive/val_256/"
train_datagen = DataGenerator(train_list, image_path, fusion = True, batch_size = 100)
val_datagen = DataGenerator(val_list, image_path, fusion = True, batch_size = 100)

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 7)
mc = keras.callbacks.ModelCheckpoint('./gdrive/MyDrive/colorize_vae_fusion_10000_es.model', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
vae = VAEFusion(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(train_datagen, validation_data = val_datagen, epochs=100, callbacks = [es, mc])

Training model 3:

In [None]:
image_path = "gdrive/MyDrive/val_256/"
train_datagen = DataGenerator(train_list, image_path, fusion = False, batch_size = 100)
val_datagen = DataGenerator(val_list, image_path, fusion = False, batch_size = 100)

In [None]:
es = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience = 7)
mc = keras.callbacks.ModelCheckpoint('other_files/colorize_vae_10000_es.model', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(train_datagen, validation_data = val_datagen, epochs=100, callbacks=[es, mc])