In [1]:
import tensorflow
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Dense, Input, Flatten,\
Conv2DTranspose, BatchNormalization, LeakyReLU, Reshape
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.datasets import mnist
import tensorflow.keras.backend as K
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.express as px

In [2]:
print(f"TensorFlow version: {tf.__version__}")

TensorFlow version: 2.7.0


In [3]:
# Check GPU availibility-
gpu_devices = tf.config.list_physical_devices('GPU')

if gpu_devices:
    # Get number of available GPUs-
    num_gpus = len(tf.config.list_physical_devices('GPU'))
    print(f"number of GPUs available = {num_gpus}")
    
    print(f"GPU: {gpu_devices}")
    details = tf.config.experimental.get_device_details(gpu_devices[0])
    print(f"GPU details: {details.get('device_name', 'Unknown GPU')}")
else:
    print("No GPU found")

number of GPUs available = 1
GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU details: Tesla K80


In [4]:
# input image dimensions
img_rows, img_cols = 28, 28

In [5]:
# Load MNIST dataset-
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [6]:
if tf.keras.backend.image_data_format() == 'channels_first':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [7]:
print(f"\ninput_shape to be used: {input_shape}")


input_shape to be used: (28, 28, 1)


In [8]:
# Specify hyper-parameters-
batch_size = 64
num_classes = 10
num_epochs = 100

In [9]:
# Convert datasets to floating point types-
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [10]:
# By default the image data consists of integers between 0 and 255 for each pixel channel. Neural networks
# work best when each input is inside the range –1 to 1, so we need to divide by 255.

# Normalize the training and testing datasets-
X_train /= 255.0
X_test /= 255.0

In [None]:
# Convert class vectors/target to binary class matrices or one-hot encoded values-
# y_train = tf.keras.utils.to_categorical(y_train, num_classes)
# y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [11]:
print("\nDimensions of training and testing sets are:")
print(f"X_train.shape: {X_train.shape}, y_train.shape: {y_train.shape}")
print(f"X_test.shape: {X_test.shape}, y_test.shape: {y_test.shape}")


Dimensions of training and testing sets are:
X_train.shape: (60000, 28, 28, 1), y_train.shape: (60000,)
X_test.shape: (10000, 28, 28, 1), y_test.shape: (10000,)


In [12]:
# Create TF datasets-
train_dataset = tf.data.Dataset.from_tensor_slices(X_train).shuffle(60000).batch(128)
test_dataset = tf.data.Dataset.from_tensor_slices(X_test).shuffle(10000).batch(128)

In [13]:
class Encoder(Model):
    def __init__(self, latent_space = 3):
        super(Encoder, self).__init__()
            
        self.latent_space = latent_space
        
        self.conv1 = Conv2D(
            filters = 32, kernel_size = 3,
            strides = (2, 2), activation = None
        )
        
        self.conv2 = Conv2D(
            filters = 64, kernel_size = 3,
            strides = (2, 2), activation = None
        )
        
        self.flatten = Flatten()

        self.dense = Dense(units = self.latent_space, activation = None)
        
        # self.dense = Dense(units = self.latent_space + self.latent_space, activation = None)
        
        
    def call(self, x):
        x = tf.keras.activations.relu(self.conv1(x))
        x = tf.keras.activations.relu(self.conv2(x))
        x = self.flatten(x)
        # print(f"flattened shape: {x.shape}")
        # flattened shape: (None, 2304)
        x = tf.keras.activations.relu(self.dense(x))
        
        return x


In [14]:
class Decoder(Model):
    def __init__(self, latent_space = 3):
        super(Decoder, self).__init__()
            
        self.latent_space = latent_space
        
        self.dense = Dense(units = self.latent_space, activation = None)
        self.dense2 = Dense(units = 7 * 7 * 32, activation = None)
        self.reshape = Reshape(target_shape = (7, 7, 32))
        
        self.conv2d_tran = Conv2DTranspose(
            filters = 64, kernel_size = 3,
            strides = 2, padding = 'same',
            activation = None
        )
        
        self.conv2d_tran2 = Conv2DTranspose(
            filters = 32, kernel_size = 3,
            strides = 2, padding = 'same',
            activation = None
        )
        
        self.conv2d_output = Conv2DTranspose(
            filters = 1, kernel_size = 3,
            strides = 1, padding = 'same'
        )
       
    
    def call(self, x):
        x = tf.keras.activations.relu(self.dense(x))
        x = tf.keras.activations.relu(self.dense2(x))
        x = self.reshape(x)
        x = tf.keras.activations.relu(self.conv2d_tran(x))
        x = tf.keras.activations.relu(self.conv2d_tran2(x))
        # x = tf.keras.activations.sigmoid(self.conv2d_output(x))
        x = self.conv2d_output(x)
        return x
        

In [None]:
encoder = Encoder(latent_space = 3)

In [None]:
decoder = Decoder(latent_space = 3)

In [None]:
X = X_train[:6, :]

In [None]:
X_enc = encoder(X)

In [None]:
X.shape, X_enc.shape

((6, 28, 28, 1), TensorShape([6, 3]))

In [None]:
X_recon = decoder(X_enc)

In [None]:
X_recon.shape

TensorShape([6, 28, 28, 1])

In [None]:
del encoder, decoder, X, X_enc, X_recon

In [15]:
class VAE(Model):
    def __init__(self, latent_space = 3):
        super(VAE, self).__init__()
        
        self.latent_space = latent_space
        
        self.encoder = Encoder(latent_space = self.latent_space)
        self.decoder = Decoder(latent_space = self.latent_space)
        
        # Define fully-connected layers for computing mean & log variance-
        self.mu = Dense(units = self.latent_space, activation = None)
        self.log_var = Dense(units = self.latent_space, activation = None)


    def reparameterize(self, mean, logvar):
        # Sample from a multivariate Gaussian distribution.
        # Adds stochasticity or variation-
        eps = tf.random.normal(shape = mean.shape)
        return (eps * tf.exp(logvar * 0.5) + mean)
        
    
    def call(self, x):
        x = self.encoder(x)
        # print(f"x.shape: {x.shape}")
        # x.shape: (batch_size, 3)
        
        mu = self.mu(x)
        log_var = self.log_var(x)
        z = self.reparameterize(mu, log_var)
        # z = Sampling()([mu, log_var])
        '''
        print(f"mu.shape: {mu.shape}, log_var.shape: {log_var.shape}"
              f" & z.shape: {z.shape}")
        # mu.shape: (batch_size, 3), log_var.shape: (batch_size, 3) & z.shape: (batch_size, 3)
        '''
        x = tf.keras.activations.sigmoid(self.decoder(z))
        return x, mu, log_var
        

In [16]:
# Initialize VAE model-
model = VAE(latent_space = 3)

In [17]:
X = X_train[:4, :]

In [18]:
X_recon, mu, log_var = model(X)

In [19]:
X.shape, X_recon.shape

((4, 28, 28, 1), TensorShape([4, 28, 28, 1]))

In [20]:
mu.shape, log_var.shape

(TensorShape([4, 3]), TensorShape([4, 3]))

In [None]:
# del X, X_recon, mu, log_var

In [None]:
# eps = tf.random.normal(shape = mu.shape)
# z = (eps * tf.exp(log_var * 0.5) + mu)

In [21]:
# Get model summary-
model.summary()

Model: "vae"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  25731     
                                                                 
 decoder (Decoder)           multiple                  43533     
                                                                 
 dense_3 (Dense)             multiple                  12        
                                                                 
 dense_4 (Dense)             multiple                  12        
                                                                 
Total params: 69,288
Trainable params: 69,288
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Count layer-wise number of trainable parameters-
tot_params = 0

for layer in model.trainable_weights:
    loc_params = tf.math.count_nonzero(layer, axis = None).numpy()
    tot_params += loc_params
    print(f"layer: {layer.shape} has {loc_params} parameters")

layer: (3, 3, 1, 32) has 288 parameters
layer: (32,) has 0 parameters
layer: (3, 3, 32, 64) has 18432 parameters
layer: (64,) has 0 parameters
layer: (2304, 3) has 6912 parameters
layer: (3,) has 0 parameters
layer: (3, 3) has 9 parameters
layer: (3,) has 0 parameters
layer: (3, 1568) has 4704 parameters
layer: (1568,) has 0 parameters
layer: (3, 3, 64, 32) has 18432 parameters
layer: (64,) has 0 parameters
layer: (3, 3, 32, 64) has 18432 parameters
layer: (32,) has 0 parameters
layer: (3, 3, 1, 32) has 288 parameters
layer: (1,) has 0 parameters
layer: (3, 3) has 9 parameters
layer: (3,) has 0 parameters
layer: (3, 3) has 9 parameters
layer: (3,) has 0 parameters


In [23]:
print(f"VAE has {tot_params} trainable parameters")

VAE has 67515 trainable parameters


In [24]:
# Define an optimizer-
optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-4)

In [25]:
def compute_reconstruction_loss(data, reconstruction):
    # Reconstruction loss-
    reconstruction_loss = tf.reduce_mean(
        tf.reduce_sum(
            # tf.keras.losses.binary_crossentropy(data, reconstruction), axis = (1, 2)
            tf.keras.losses.mean_squared_error(data, reconstruction), axis = (1, 2)
        )
    )
    
    return reconstruction_loss


In [None]:
# recon_loss = compute_reconstruction_loss(data = X, reconstruction = X_recon)

In [None]:
# recon_loss, recon_loss.numpy()
# (<tf.Tensor: shape=(), dtype=float32, numpy=80.38895>, 80.38895)

In [26]:
def compute_kl_divergence_loss(mu, log_var):
    kl_loss = -0.5 * (1 + log_var - tf.square(mu) - tf.exp(log_var))
    kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis = 1))
    
    return kl_loss


In [None]:
# kl_loss = compute_kl_divergence_loss(mu = mu, log_var = log_var)

In [None]:
# kl_loss, kl_loss.numpy()
# (<tf.Tensor: shape=(), dtype=float32, numpy=0.0031785667>, 0.0031785667)

In [27]:
def compute_total_loss(data, reconstruction, mu, log_var, alpha = 1):
    recon_loss = compute_reconstruction_loss(data = data, reconstruction = reconstruction)
    kl_loss = compute_kl_divergence_loss(mu = mu, log_var = log_var)
    
    total_loss = (recon_loss * alpha) + kl_loss
    loss = total_loss**2
    
    # return total_loss
    return loss


In [None]:
'''
total_loss = compute_total_loss(
    data = X, reconstruction = X_recon,
    mu = mu, log_var = log_var,
    alpha = 1
)
'''

In [None]:
# total_loss, total_loss.numpy()
# (<tf.Tensor: shape=(), dtype=float32, numpy=80.39213>, 80.39213)

In [32]:
X_recon, mu, log_var = model(X)

In [33]:
X_recon.shape, X.shape

(TensorShape([4, 28, 28, 1]), (4, 28, 28, 1))

In [34]:
mu.shape, log_var.shape

(TensorShape([4, 3]), TensorShape([4, 3]))

In [35]:
with tf.GradientTape() as tape:
    loss = compute_total_loss(
    data = X, reconstruction = X_recon,
    mu = mu, log_var = log_var,
    alpha = 1
)

In [36]:
grads = tape.gradient(loss, model.trainable_weights)

In [37]:
type(grads), len(grads)

(list, 20)

In [38]:
for x in grads:
    print(x)

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None


In [39]:
optimizer.apply_gradients(zip(grads, model.trainable_weights))

ValueError: ignored

In [None]:
with tf.GradientTape() as tape:
    recon_loss = compute_reconstruction_loss(data = X, reconstruction = X_recon)
    # kl_loss = compute_kl_divergence_loss(mu = mu, log_var = log_var)

In [None]:
grads = tape.gradient(recon_loss, model.trainable_weights)

In [None]:
type(grads), len(grads)

(list, 20)

In [None]:
for x in grads:
    print(x)

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None


In [None]:
optimizer.apply_gradients(zip(grads, model.trainable_weights))

In [None]:
"""
with tf.GradientTape() as tape:
    total_loss = compute_total_loss(
        data = X, reconstruction = X_recon,
        mu = mu, log_var = log_var,
        alpha = 1
    )
"""

In [None]:
# total_loss
# <tf.Tensor: shape=(), dtype=float32, numpy=80.39213>

In [None]:
# grads = tape.gradient(total_loss, model.trainable_weights)

In [None]:
# type(grads), len(grads)
# (list, 16)

In [None]:
'''
for x in grads:
    print(x)
'''

'''
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
'''

In [None]:
# optimizer.apply_gradients(zip(grads, model.trainable_weights))
# ValueError: No gradients provided for any variable: 

In [None]:
@tf.function
def train_one_step(model, data, optimizer):
    data_recon, mu, log_var = model(data)

    with tf.GradientTape() as tape:
      total_loss = compute_total_loss(
          data = data, reconstruction = data_recon,
          mu = mu, log_var = log_var,
          alpha = 1
          )

    grads = tape.gradient(total_loss, model.trainable_weights)

    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    return total_loss



In [None]:
'''
total_loss = train_one_step(
    model = model, data = X,
    optimizer = optimizer
)
'''
# ValueError: No gradients provided for any variable:

In [None]:
# total_loss
# <tf.Tensor: shape=(), dtype=float32, numpy=80.39213>

In [None]:
vae_total_loss = []

In [None]:
for epoch in range(1, 6):
  for data in train_dataset:
    total_loss = train_one_step(
        model = model, data = data,
        optimizer = optimizer
        )
    
    vae_total_loss.append(total_loss)


ValueError: ignored

In [None]:
data.shape

TensorShape([128, 28, 28, 1])