## Model 

**Loading**

In [1]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Reshape
from tensorflow.math import exp, sqrt, square

**VAE**

In [2]:
class VAE(tf.keras.Model):
    def __init__(self, input_size, latent_size=15):
        super(VAE, self).__init__()
        self.input_size = input_size # H*W  Original dimention
        self.latent_size = latent_size  # Z 
        
        self.hidden_dim = 200  # H_d up to you
        
        self.encoder = Sequential()
        self.mu_layer = Dense(self.latent_size)
        self.logvar_layer = Dense(self.latent_size)
        self.decoder = Sequential()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-3)

        # Encoder
        self.encoder.add(Flatten(input_shape = (28,28))) # input_shape = self.input_size (int(sqrt(self.input_size)),int(sqrt(self.input_size)))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        

        # Decoder
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.input_size, activation = 'sigmoid'))
        self.decoder.add(Reshape((1,28,28)))
        


    def call(self, x):
        """
        Performs forward pass through FC-VAE model by passing image through 
        encoder, reparametrize trick, and decoder models
    
        Inputs:
        - x: Batch of input images of shape (N, 1, H, W)
        
        Returns:
        - x_hat: Reconstruced input data of shape (N,1,H,W)
        - mu: Matrix representing estimated posterior mu (N, Z), with Z latent space dimension
        - logvar: Matrix representing estimataed variance in log-space (N, Z), with Z latent space dimension
        """

        # Replace "pass" statement with your code
        encoder = self.encoder(x)
        mu      = self.mu_layer(encoder)
        logvar  = self.logvar_layer(encoder)
        z       = reparametrize(mu, logvar)
        x_hat   = self.decoder(z)
        

        return x_hat, mu, logvar


**CVAE**

In [3]:
class CVAE(tf.keras.Model):
    def __init__(self, input_size, num_classes=10, latent_size=15):
        super(CVAE, self).__init__()
        self.input_size = input_size # H*W
        self.latent_size = latent_size # Z
        self.num_classes = num_classes # C
        self.hidden_dim = 200 # H_d
        self.encoder = Sequential()
        self.mu_layer = Dense(self.latent_size)
        self.logvar_layer = Dense(self.latent_size)
        self.decoder = Sequential()


        # Replace "pass" statement with your code
        # self.encoder.add(Flatten(input_shape = self.input_size))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))


        # Replace "pass" statement with your code
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.input_size, activation = 'sigmoid'))
        self.decoder.add(Reshape((28,28)))


    def call(self, x, c):
        """
        Performs forward pass through FC-CVAE model by passing image through 
        encoder, reparametrize trick, and decoder models
    
        Inputs:
        - x: Input data for this timestep of shape (N, 1, H, W)
        - c: One hot vector representing the input class (0-9) (N, C)
        
        Returns:
        - x_hat: Reconstruced input data of shape (N, 1, H, W)
        - mu: Matrix representing estimated posterior mu (N, Z), with Z latent space dimension
        - logvar: Matrix representing estimated variance in log-space (N, Z),  with Z latent space dimension
        """

        # Replace "pass" statement with your code
        x_flat    = Flatten(input_shape = (28,28))
        initial_1 = tf.concat([x_flat(x),tf.cast(c, dtype = "float32")],1)          #tf.concat([x_flat(x), tf.reshape(tf.cast(c, dtype = "float32"),(-1,1))], 1)
        encoder   = self.encoder(initial_1)
        mu        = self.mu_layer(encoder)
        logvar    = self.logvar_layer(encoder)
        z         = reparametrize(mu, logvar)
        initial_2 = tf.concat([z,tf.cast(c, dtype = "float32")],1) 
        x_hat     = self.decoder(initial_2)

        return x_hat, mu, logvar

**Tool Function** Checked

In [166]:
def reparametrize(mu, logvar): # Checked
    """
    Differentiably sample random Gaussian data with specified mean and variance using the
    reparameterization trick.

    Suppose we want to sample a random number z from a Gaussian distribution with mean mu and
    standard deviation sigma, such that we can backpropagate from the z back to mu and sigma.
    We can achieve this by first sampling a random value epsilon from a standard Gaussian
    distribution with zero mean and unit variance, then setting z = sigma * epsilon + mu.

    For more stable training when integrating this function into a neural network, it helps to
    pass this function the log of the variance of the distribution from which to sample, rather
    than specifying the standard deviation directly.

    Inputs:
    - mu: Tensor of shape (N, Z) giving means
    - logvar: Tensor of shape (N, Z) giving log-variances

    Returns: 
    - z: Estimated latent vectors, where z[i, j] is a random value sampled from a Gaussian with
         mean mu[i, j] and log-variance logvar[i, j].
    """

    "Sample from the normal distribution"
    epsilon = tf.random.normal(shape = tf.shape(mu))
    z = mu + tf.exp(0.5 * logvar) * epsilon

    return z

def bce_function(x_hat, x): # Checked
    """
    Computes the reconstruction loss of the VAE.
    
    Inputs:
    - x_hat: Reconstructed input data of shape (N, 1, H, W)
    - x: Input data for this timestep of shape (N, 1, H, W)
    
    Returns:
    - reconstruction_loss: Tensor containing the scalar loss for the reconstruction loss term.
    """
    bce_fn = tf.keras.losses.BinaryCrossentropy(
        from_logits=False, 
        reduction=tf.keras.losses.Reduction.SUM,
    )
    reconstruction_loss = bce_fn(x, x_hat) * x.shape[-1]  # Sum over all loss terms for each data point. This looks weird, but we need this to work...
    return reconstruction_loss


def loss_function(x_hat, x, mu, logvar): # checked
    """
    Computes the negative variational lower bound loss term of the VAE (refer to formulation in notebook).
    Returned loss is the average loss per sample in the current batch.

    Inputs:
    - x_hat: Reconstructed input data of shape (N, 1, H, W)
    - x: Input data for this timestep of shape (N, 1, H, W)
    - mu: Matrix representing estimated posterior mu (N, Z), with Z latent space dimension
    - logvar: Matrix representing estimated variance in log-space (N, Z), with Z latent space dimension
    
    Returns:
    - loss: Tensor containing the scalar loss for the negative variational lowerbound
    """
    reconstruction_loss = bce_function(x_hat, x)
    kl_loss = -0.5 * tf.reduce_sum(1 + logvar - tf.square(mu) - tf.exp(logvar), axis=-1)
    loss = tf.reduce_mean(reconstruction_loss + kl_loss)
    return loss

## Assignment

**Loading**

In [5]:
import argparse
import math
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.math import sigmoid
from tqdm import tqdm
from vae import VAE, CVAE, reparametrize, loss_function

**parseArguments**  


all set of arguments

In [6]:
def parseArguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("--is_cvae", action="store_true")
    parser.add_argument("--load_weights", action="store_true")
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--num_epochs", type=int, default=10)
    parser.add_argument("--latent_size", type=int, default=15)
    parser.add_argument("--input_size", type=int, default=28*28)
    parser.add_argument("--learning_rate", type=float, default=1e-3)
    args = parser.parse_args()
    return args

**one_hot**

In [7]:
def one_hot(labels, class_size):
    """
    Create one hot label matrix of size (N, C)

    Inputs:
    - labels: Labels Tensor of shape (N,) representing a ground-truth label
    for each MNIST image
    - class_size: Scalar representing of target classes our dataset 
    Returns:
    - targets: One-hot label matrix of (N, C), where targets[i, j] = 1 when 
    the ground truth label for image i is j, and targets[i, :j] & 
    targets[i, j + 1:] are equal to 0
    """
    targets = np.zeros((labels.shape[0], class_size))
    for i, label in enumerate(labels):
        targets[i, label] = 1
    targets = tf.convert_to_tensor(targets)
    targets = tf.cast(targets, tf.float32)
    return targets

**train_vae**

In [8]:
def train_vae(model, train_loader, args, is_cvae=False):
    """
    Train your VAE with one epoch.

    Inputs:
    - model: Your VAE instance.
    - train_loader: A tf.data.Dataset of MNIST dataset.
    - args: All arguments.
    - is_cvae: A boolean flag for Conditional-VAE. If your model is a Conditional-VAE,
    set is_cvae=True. If it's a Vanilla-VAE, set is_cvae=False.

    Returns:
    - total_loss: Sum of loss values of all batches.
    """
    
    sum_loss = 0
    if is_cvae == False:
        for batch in train_loader:
            x = batch[0]
            with tf.GradientTape() as tape:
                x_hat, mu, logvar = model.call(x)
                loss = loss_function(x_hat, x, mu, logvar)
            grads = tape.gradient(loss, model.trainable_weights)
            model.optimizer.apply_gradients(zip(grads, model.trainable_weights))
            sum_loss += tf.reduce_sum(loss)
    else:
        for batch in train_loader:
            x = batch[0]
            c = one_hot(batch[1],10)
            with tf.GradientTape() as tape:
                x_hat, mu, logvar = model.call(x,c)
                loss = loss_function(x_hat, x, mu, logvar)
            grads = tape.gradient(loss, model.trainable_weights)
            model.optimizer.apply_gradients(zip(grads, model.trainable_weights))
            sum_loss += tf.reduce_sum(loss)
        
        
    return sum_loss


**Load Mnist**

In [9]:

def load_mnist(batch_size, buffer_size=1024):
    """
    Load and preprocess MNIST dataset from tf.keras.datasets.mnist.

    Inputs:
    - batch_size: An integer value of batch size.
    - buffer_size: Buffer size for random sampling in tf.data.Dataset.shuffle().

    Returns:
    - train_dataset: A tf.data.Dataset instance of MNIST dataset. Batching and shuffling are already supported.
    """
    mnist = tf.keras.datasets.mnist
    (x_train, y_train), _ = mnist.load_data()
    x_train = x_train / 255.0
    x_train = np.expand_dims(x_train, axis=1)  # [batch_sz, channel_sz, height, width]
    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=buffer_size).batch(batch_size, drop_remainder=True)
    return train_dataset

**Save model weights**

In [10]:
def save_model_weights(model, args):
        """
        Save trained VAE model weights to model_ckpts/

        Inputs:
        - model: Trained VAE model.
        - args: All arguments.
        """
        model_flag = "cvae" if args.is_cvae else "vae"
        output_dir = os.path.join("model_ckpts", model_flag)
        output_path = os.path.join(output_dir, model_flag)
        os.makedirs("model_ckpts", exist_ok=True)
        os.makedirs(output_dir, exist_ok=True)
        model.save_weights(output_path)


**show_vae_images**

In [11]:
def show_vae_images(model, latent_size):
    """
    Call this only if the model is VAE!
    Generate 10 images from random vectors.
    Show the generated images from your trained VAE.
    Image will be saved to outputs/show_vae_images.pdf

    Inputs:
    - model: Your trained model.
    - latent_size: Latent size of your model.
    """
    # Generated images from vectors of random values.
    z = tf.random.normal(shape=[10, latent_size])
    samples = model.decoder(z).numpy()

    # Visualize
    fig = plt.figure(figsize=(10, 1))
    gspec = gridspec.GridSpec(1, 10)
    gspec.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):
        ax = plt.subplot(gspec[i])
        plt.axis("off")
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect("equal")
        plt.imshow(sample.reshape(28, 28), cmap="Greys_r")

    # Save the generated images
    os.makedirs("outputs", exist_ok=True)
    output_path = os.path.join("outputs", "show_vae_images.pdf")
    plt.savefig(output_path, bbox_inches="tight")
    plt.close(fig)

**show_vae_interpolation**

In [12]:
def show_vae_interpolation(model, latent_size):
    """
    Call this only if the model is VAE!
    Generate interpolation between two .
    Show the generated images from your trained VAE.
    Image will be saved to outputs/show_vae_interpolation.pdf

    Inputs:
    - model: Your trained model.
    - latent_size: Latent size of your model.
    """
    def show_interpolation(images):
        """
        A helper to visualize the interpolation.
        """
        images = tf.reshape(images, [images.shape[0], -1])  # images reshape to (batch_size, D)
        sqrtn = int(math.ceil(math.sqrt(images.shape[0])))
        sqrtimg = int(math.ceil(math.sqrt(images.shape[1])))

        fig = plt.figure(figsize=(sqrtn, sqrtn))
        gs = gridspec.GridSpec(sqrtn, sqrtn)
        gs.update(wspace=0.05, hspace=0.05)
        for i, img in enumerate(images):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(tf.reshape(img,[sqrtimg,sqrtimg]))

        # Save the generated images
        os.makedirs("outputs", exist_ok=True)
        output_path = os.path.join("outputs", "show_vae_interpolation.pdf")
        plt.savefig(output_path, bbox_inches="tight")
        plt.close(fig)

    S = 12
    z0 = tf.random.normal(shape=[S,latent_size], dtype=tf.dtypes.float32)  # [S, latent_size]
    z1 = tf.random.normal(shape=[S,latent_size], dtype=tf.dtypes.float32)
    w = tf.linspace(0, 1, S)
    w = tf.cast(tf.reshape(w, (S,1,1)), dtype=tf.float32)  # [S, 1, 1]
    z = tf.transpose(w * z0 + (1 - w) * z1, perm=[1,0,2])
    z = tf.reshape(z, (S*S, latent_size))  # [S, S, latent_size]
    x = model.decoder(z)  # [S*S, 1, 28, 28]
    show_interpolation(x)

**show_cvae_images**

In [13]:
def show_cvae_images(model, latent_size):
    """
    Call this only if the model is CVAE!
    Conditionally generate 10 images for each digit.
    Show the generated images from your trained CVAE.
    Image will be saved to outputs/show_cvae_images.pdf

    Inputs:
    - model: Your trained model.
    - latent_size: Latent size of your model.
    """
    # Conditionally generated images from vectors of random values.
    num_generation = 100
    num_classes = 10
    num_per_class = num_generation // num_classes
    c = tf.eye(num_classes) # [one hot labels for 0-9]
    z = []
    labels = []
    for label in range(num_classes):
        curr_c = c[label]
        curr_c = tf.broadcast_to(curr_c, [num_per_class, len(curr_c)])
        curr_z = tf.random.normal(shape=[num_per_class,latent_size])
        curr_z = tf.concat([curr_z,curr_c], axis=-1)
        z.append(curr_z)
        labels.append([label]*num_per_class)
    z = np.concatenate(z)
    labels = np.concatenate(labels)
    samples = model.decoder(z).numpy()

    # Visualize
    rows = num_classes
    cols = num_generation // rows

    fig = plt.figure(figsize=(cols, rows))
    gspec = gridspec.GridSpec(rows, cols)
    gspec.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):
        ax = plt.subplot(gspec[i])
        plt.axis("off")
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect("equal")
        plt.imshow(sample.reshape(28, 28), cmap="Greys_r")

    # Save the generated images
    os.makedirs("outputs", exist_ok=True)
    output_path = os.path.join("outputs", "show_cvae_images.pdf")
    plt.savefig(output_path, bbox_inches="tight")
    plt.close(fig)

**load_weights**

In [14]:
def load_weights(model):
    """
    Load the trained model's weights.

    Inputs:
    - model: Your untrained model instance.
    
    Returns:
    - model: Trained model.
    """
    num_classes = 10
    inputs = tf.zeros([1,1,28,28])  # Random data sample
    labels = tf.constant([[0]])
    if args.is_cvae:
        weights_path = os.path.join("model_ckpts", "cvae", "cvae")
        one_hot_vec = one_hot(labels, num_classes)
        _ = model(inputs, one_hot_vec)
        model.load_weights(weights_path)
    else:
        weights_path = os.path.join("model_ckpts", "vae", "vae")
        _ = model(inputs)
        model.load_weights(weights_path)
    return model

**main**

In [None]:
def main(args):
    # Load MNIST dataset
    train_dataset = load_mnist(args.batch_size)

    # Get an instance of VAE
    if args.is_cvae:
        model = CVAE(args.input_size, latent_size=args.latent_size)
    else:
        model = VAE(args.input_size, latent_size=args.latent_size)

    # Load trained weights
    #if args.load_weights:
    #    model = load_weights(model)

    # Train VAE
    for epoch_id in range(args.num_epochs):
        total_loss = train_vae(model, train_dataset, args, is_cvae=args.is_cvae)
        print(f"Train Epoch: {epoch_id} \tLoss: {total_loss/len(train_dataset):.6f}")

    # Visualize results
    if args.is_cvae:
        show_cvae_images(model, args.latent_size)
    else:
        show_vae_images(model, args.latent_size)
        show_vae_interpolation(model, args.latent_size)

    # Optional: Save VAE/CVAE model for debugging/testing.
    save_model_weights(model, args)

if __name__ == "__main__":
    args = parseArguments()
    main(args)

## =========================== Free try ====================================
## ======================================================================

## Data

In [114]:
dataset = tf.compat.v1.data.make_one_shot_iterator(load_mnist(128, buffer_size=1024)).get_next()

In [115]:
xx = dataset[0]
cc = dataset[1]

## Checking Area

In [276]:
class VAE(tf.keras.Model):
    def __init__(self, input_size, latent_size=15):
        super(VAE, self).__init__()
        
        self.input_size  = input_size     # H*W  Original dimention
        self.latent_size = latent_size   # Z 
        self.hidden_dim  = 400            # H_d 

        
        
        self.encoder      = Sequential()
        self.decoder      = Sequential()
        self.mu_layer     = Dense(self.latent_size)
        self.logvar_layer = Dense(self.latent_size)
        self.optimizer    = tf.keras.optimizers.Adam(learning_rate = 1e-3)

        # Encoder
        self.encoder.add(Flatten(input_shape = (1,28,28))) # input_shape = self.input_size (int(sqrt(self.input_size)),int(sqrt(self.input_size)))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.encoder.add(Dense(units = self.hidden_dim, activation = 'relu')) # add later
        

        # Decoder
        self.decoder.add(tf.keras.layers.Input(self.latent_size, ))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.hidden_dim, activation = 'relu'))
        self.decoder.add(Dense(units = self.input_size, activation = 'sigmoid'))
        self.decoder.add(Reshape((1,28,28)))
        


    def call(self, x):
        """
        Performs forward pass through FC-VAE model by passing image through 
        encoder, reparametrize trick, and decoder models
    
        Inputs:
        - x: Batch of input images of shape (N, 1, H, W)
        
        Returns:
        - x_hat: Reconstruced input data of shape (N,1,H,W)
        - mu: Matrix representing estimated posterior mu (N, Z), with Z latent space dimension
        - logvar: Matrix representing estimataed variance in log-space (N, Z), with Z latent space dimension
        """

        # Forward
        encoder_out = self.encoder(x)
        mu          = self.mu_layer(encoder_out)
        logvar      = self.logvar_layer(encoder_out)
        z           = reparametrize(mu, logvar)
        x_hat       = self.decoder(z)
        

        return x_hat, mu, logvar


In [277]:
def train_vae(model, train_loader, args, is_cvae=False):
    """
    Train your VAE with one epoch.

    Inputs:
    - model: Your VAE instance.
    - train_loader: A tf.data.Dataset of MNIST dataset.
    - args: All arguments.
    - is_cvae: A boolean flag for Conditional-VAE. If your model is a Conditional-VAE,
    set is_cvae=True. If it's a Vanilla-VAE, set is_cvae=False.

    Returns:
    - total_loss: Sum of loss values of all batches.
    """
    
    sum_loss = 0
    itr = 0
    if is_cvae == False:
        for batch in train_loader:
            x = batch[0]
            with tf.GradientTape() as tape:
                x_hat, mu, logvar = model.call(x)
                loss = loss_function(x_hat, x, mu, logvar)
            grads = tape.gradient(loss, model.trainable_weights)
            model.optimizer.apply_gradients(zip(grads, model.trainable_weights))
            # itr += 1
            # print(f"{itr}th step: The loss is [{tf.round(loss,2)}] x size {x.shape}")
            sum_loss += loss
    else:
        for batch in train_loader:
            x = batch[0]
            c = one_hot(batch[1],10)
            with tf.GradientTape() as tape:
                x_hat, mu, logvar = model.call(x,c)
                loss = loss_function(x_hat, x, mu, logvar)
            grads = tape.gradient(loss, model.trainable_weights)
            model.optimizer.apply_gradients(zip(grads, model.trainable_weights))
            # print(f"The loss is {loss} c shape {c.shape}")
            sum_loss += loss
        
        
    return sum_loss


In [279]:
def loss_function(x_hat, x, mu, logvar):
    """
    Computes the negative variational lower bound loss term of the VAE (refer to formulation in notebook).
    Returned loss is the average loss per sample in the current batch.

    Inputs:
    - x_hat: Reconstructed input data of shape (N, 1, H, W)
    - x: Input data for this timestep of shape (N, 1, H, W)
    - mu: Matrix representing estimated posterior mu (N, Z), with Z latent space dimension
    - logvar: Matrix representing estimated variance in log-space (N, Z), with Z latent space dimension
    
    Returns:
    - loss: Tensor containing the scalar loss for the negative variational lowerbound
    """
    loss = None
    ################################################################################################
    # TODO: Compute negative variational lowerbound loss as described in the notebook              #
    ################################################################################################
    # Replace "pass" statement with your code
    loss = bce_function(x_hat, x) / x.shape[0]
    loss += -0.5 * tf.reduce_mean(tf.reduce_sum(1 + logvar - square(mu) - exp(logvar), axis=-1))
    
    ################################################################################################
    #                            END OF YOUR CODE                                                  #
    ################################################################################################
    return loss

## Input Area

## VA

In [280]:
VA = VAE(784)
for i in range(10):
    print(f"{i+1}th iteration ===========================================================================")
    a = train_vae(VA,load_mnist(128, buffer_size=1024) , 1, is_cvae=False)
    print(f"iteration {i+1}: average loss is {a/len(load_mnist(128, buffer_size=1024))}")

iteration 1: average loss is 179.89028930664062
iteration 2: average loss is 131.95040893554688
iteration 3: average loss is 122.48843383789062
iteration 4: average loss is 117.58981323242188
iteration 5: average loss is 114.05528259277344
iteration 6: average loss is 111.8182601928711
iteration 7: average loss is 110.28640747070312
iteration 8: average loss is 109.09989166259766
iteration 9: average loss is 108.18840026855469
iteration 10: average loss is 107.3702163696289


In [None]:
loss_function(VA.call(xx)[0], xx, VA.call(xx)[1], VA.call(xx)[2])

## CVA

In [40]:
CVA = CVAE(784)
for i in range(10):
    print(f"{i+1}th iteration ===========================================================================")
    train_vae(CVA,load_mnist(128, buffer_size=1024) , 1, is_cvae=True)

The loss is 69642.421875 c shape (128, 10)
The loss is 68826.734375 c shape (128, 10)
The loss is 67871.90625 c shape (128, 10)
The loss is 66320.21875 c shape (128, 10)
The loss is 63671.09375 c shape (128, 10)
The loss is 59998.08203125 c shape (128, 10)
The loss is 54021.578125 c shape (128, 10)
The loss is 47801.0390625 c shape (128, 10)
The loss is 42701.38671875 c shape (128, 10)
The loss is 41899.6796875 c shape (128, 10)
The loss is 42053.65234375 c shape (128, 10)
The loss is 36781.51171875 c shape (128, 10)
The loss is 33373.03515625 c shape (128, 10)
The loss is 30556.658203125 c shape (128, 10)
The loss is 30599.41015625 c shape (128, 10)
The loss is 30897.09375 c shape (128, 10)
The loss is 30025.287109375 c shape (128, 10)
The loss is 29365.80078125 c shape (128, 10)
The loss is 28809.822265625 c shape (128, 10)
The loss is 28110.572265625 c shape (128, 10)
The loss is 28969.12890625 c shape (128, 10)
The loss is 27842.0390625 c shape (128, 10)
The loss is 28756.154296875

The loss is 18290.548828125 c shape (128, 10)
The loss is 17296.015625 c shape (128, 10)
The loss is 18530.08203125 c shape (128, 10)
The loss is 18809.6875 c shape (128, 10)
The loss is 18287.359375 c shape (128, 10)
The loss is 18228.955078125 c shape (128, 10)
The loss is 17800.666015625 c shape (128, 10)
The loss is 17207.41015625 c shape (128, 10)
The loss is 18376.828125 c shape (128, 10)
The loss is 17734.37109375 c shape (128, 10)
The loss is 17540.408203125 c shape (128, 10)
The loss is 17070.23046875 c shape (128, 10)
The loss is 17256.30859375 c shape (128, 10)
The loss is 16878.953125 c shape (128, 10)
The loss is 17738.27734375 c shape (128, 10)
The loss is 17581.123046875 c shape (128, 10)
The loss is 17243.568359375 c shape (128, 10)
The loss is 17849.619140625 c shape (128, 10)
The loss is 17625.111328125 c shape (128, 10)
The loss is 17516.328125 c shape (128, 10)
The loss is 16517.068359375 c shape (128, 10)
The loss is 16954.998046875 c shape (128, 10)
The loss is 17

The loss is 14798.6298828125 c shape (128, 10)
The loss is 15790.7060546875 c shape (128, 10)
The loss is 14733.537109375 c shape (128, 10)
The loss is 15034.740234375 c shape (128, 10)
The loss is 14851.5732421875 c shape (128, 10)
The loss is 15013.318359375 c shape (128, 10)
The loss is 15260.6015625 c shape (128, 10)
The loss is 15211.1767578125 c shape (128, 10)
The loss is 14659.796875 c shape (128, 10)
The loss is 15612.7119140625 c shape (128, 10)
The loss is 15323.0947265625 c shape (128, 10)
The loss is 14851.296875 c shape (128, 10)
The loss is 15558.552734375 c shape (128, 10)
The loss is 14611.3359375 c shape (128, 10)
The loss is 15088.59765625 c shape (128, 10)
The loss is 15697.77734375 c shape (128, 10)
The loss is 14075.458984375 c shape (128, 10)
The loss is 14749.2724609375 c shape (128, 10)
The loss is 14176.142578125 c shape (128, 10)
The loss is 15139.734375 c shape (128, 10)
The loss is 15046.4599609375 c shape (128, 10)
The loss is 14910.3037109375 c shape (128

The loss is 13787.716796875 c shape (128, 10)
The loss is 12646.875 c shape (128, 10)
The loss is 12329.6328125 c shape (128, 10)
The loss is 13086.048828125 c shape (128, 10)
The loss is 12565.703125 c shape (128, 10)
The loss is 13027.333984375 c shape (128, 10)
The loss is 12636.751953125 c shape (128, 10)
The loss is 12172.294921875 c shape (128, 10)
The loss is 12141.576171875 c shape (128, 10)
The loss is 12821.28515625 c shape (128, 10)
The loss is 13014.337890625 c shape (128, 10)
The loss is 13319.04296875 c shape (128, 10)
The loss is 12461.2001953125 c shape (128, 10)
The loss is 12606.9375 c shape (128, 10)
The loss is 13210.025390625 c shape (128, 10)
The loss is 13314.54296875 c shape (128, 10)
The loss is 13419.19921875 c shape (128, 10)
The loss is 13066.5771484375 c shape (128, 10)
The loss is 13674.13671875 c shape (128, 10)
The loss is 13191.162109375 c shape (128, 10)
The loss is 13117.185546875 c shape (128, 10)
The loss is 12917.8349609375 c shape (128, 10)
The lo

KeyboardInterrupt: 

In [247]:
tf.round(1.22,1)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

## Others
