<a href="https://colab.research.google.com/github/gc2321/3546-Deep-Learning/blob/main/assign_4/assign_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

shapes3d: https://www.tensorflow.org/datasets/catalog/shapes3d

In [1]:
import os
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import random
random.seed(42)

In [3]:
import tensorflow_datasets as tfds

In [4]:
data = tfds.builder('shapes3d').info

In [5]:
data

tfds.core.DatasetInfo(
    name='shapes3d',
    full_name='shapes3d/2.0.0',
    description="""
    3dshapes is a dataset of 3D shapes procedurally generated from 6 ground truth
    independent latent factors. These factors are *floor colour*, *wall colour*,
    *object colour*, *scale*, *shape* and *orientation*.
    
    All possible combinations of these latents are present exactly once, generating
    N = 480000 total images.
    
    ### Latent factor values
    
    *   floor hue: 10 values linearly spaced in [0, 1]
    *   wall hue: 10 values linearly spaced in [0, 1]
    *   object hue: 10 values linearly spaced in [0, 1]
    *   scale: 8 values linearly spaced in [0, 1]
    *   shape: 4 values in [0, 1, 2, 3]
    *   orientation: 15 values linearly spaced in [-30, 30]
    
    We varied one latent at a time (starting from orientation, then shape, etc), and
    sequentially stored the images in fixed order in the `images` array. The
    corresponding values of the factors are s

In [6]:
total = data.splits['train'].num_examples
# take 30% of data
take_data = int(0.3 * total)
train_dataset = tfds.load('shapes3d', split=f'train[:{take_data}]')

Downloading and preparing dataset 255.18 MiB (download: 255.18 MiB, generated: Unknown size, total: 255.18 MiB) to /root/tensorflow_datasets/shapes3d/2.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/1 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/480000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/shapes3d/incomplete.G8F1CB_2.0.0/shapes3d-train.tfrecord*...:   0%|       …

Dataset shapes3d downloaded and prepared to /root/tensorflow_datasets/shapes3d/2.0.0. Subsequent calls will reuse this data.


In [7]:
take_data

144000

In [11]:
def rescale_image(features):
    features['image'] = tf.cast(features['image'], tf.float32) / 255.0
    return features

In [12]:
train_dataset = train_dataset.map(rescale_image)

## Variational Autoencoder

In [21]:
# https://www.tensorflow.org/tutorials/generative/cvae#network_architecture

class CVAE(tf.keras.Model):
    """Convolutional variational autoencoder."""

    def __init__(self, latent_dim):
        """
        latent_dim: int, typically much smaller than the original input dimension and represent the compressed, encoded version of the data.
        """
        super(CVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = tf.keras.Sequential(
            [
                # images are 64x64, color images
                tf.keras.layers.InputLayer(input_shape=(64, 64, 3)),
                # 4 layers
                tf.keras.layers.Conv2D(
                    filters=32, kernel_size=3, strides=(2, 2), activation="relu"
                ),
                tf.keras.layers.Conv2D(
                    filters=64, kernel_size=3, strides=(2, 2), activation="relu"
                ),
                tf.keras.layers.Conv2D(
                    filters=128, kernel_size=3, strides=(2, 2), activation="relu"
                ),
                tf.keras.layers.Conv2D(
                    filters=256, kernel_size=3, strides=(2, 2), activation="relu"
                ),
                tf.keras.layers.Flatten(),
                # No activation
                tf.keras.layers.Dense(latent_dim + latent_dim),
            ]
        )

        self.decoder = tf.keras.Sequential(
            [
                tf.keras.layers.InputLayer(input_shape=(latent_dim,)),
                tf.keras.layers.Dense(units=4 * 4 * 256, activation=tf.nn.relu),
                tf.keras.layers.Reshape(target_shape=(4, 4, 256)),
                tf.keras.layers.Conv2DTranspose(
                    filters=128,
                    kernel_size=3,
                    strides=2,
                    padding="same",
                    activation="relu",
                ),
                tf.keras.layers.Conv2DTranspose(
                    filters=64,
                    kernel_size=3,
                    strides=2,
                    padding="same",
                    activation="relu",
                ),
                tf.keras.layers.Conv2DTranspose(
                    filters=32,
                    kernel_size=3,
                    strides=2,
                    padding="same",
                    activation="relu",
                ),
                # No activation
                tf.keras.layers.Conv2DTranspose(
                    filters=3, kernel_size=3, strides=2, padding="same"
                ),
            ]
        )

        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def encode(self, x):
        mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)
        return mean, logvar

    def reparameterize(self, mean, logvar):
        eps = tf.random.normal(shape=mean.shape)
        return eps * tf.exp(logvar * 0.5) + mean

    def decode(self, z, apply_sigmoid=False):
        logits = self.decoder(z)
        if apply_sigmoid:
            probs = tf.sigmoid(logits)
            return probs
        return logits

    def train_step(self, data):
        with tf.GradientTape() as tape:
            mean, logvar = self.encode(data)
            z = self.reparameterize(mean, logvar)
            reconstruction = self.decode(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    tf.keras.losses.binary_crossentropy(data, reconstruction),
                    axis=(1, 2, 3),
                )
            )
            kl_loss = -0.5 * tf.reduce_mean(
                tf.reduce_sum(1 + logvar - tf.square(mean) - tf.exp(logvar), axis=1)
            )
            total_loss = reconstruction_loss + kl_loss

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }


## Train Model

In [14]:
batch = 32
train_dataset = train_dataset.batch(batch)

In [22]:
latent_dim = 2
cvae = CVAE(latent_dim)
cvae.compile(optimizer=tf.keras.optimizers.Adam())

In [23]:
cvae.fit(train_dataset, epochs=15)

Epoch 1/15


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "<ipython-input-21-cb01dfcb2ab1>", line 97, in train_step
        mean, logvar = self.encode(data)
    File "<ipython-input-21-cb01dfcb2ab1>", line 81, in encode
        mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 197, in assert_input_compatibility
        raise ValueError(

    ValueError: Missing data for input "input_3". You passed a data dictionary with keys ['image', 'label_floor_hue', 'label_object_hue', 'label_orientation', 'label_scale', 'label_shape', 'label_wall_hue', 'value_floor_hue', 'value_object_hue', 'value_orientation', 'value_scale', 'value_shape', 'value_wall_hue']. Expected the following keys: ['input_3']
