# Auto-encoding images in latent space

## setup

### base modules

In [1]:
from pathlib import Path
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.utils import image_dataset_from_directory

2023-09-06 17:12:01.042900: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-06 17:12:01.045480: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-06 17:12:01.137856: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-06 17:12:01.139621: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### build dataset

In [2]:
IMAGES_PATH = "raw_data/photos/movies_100" # contains atm 4_659 images
IMAGE_WIDTH = 350
IMAGE_HEIGHT = 525

In [5]:
def build_unsupervised_dataset(images_path=IMAGES_PATH,
                               width=IMAGE_WIDTH, height=IMAGE_HEIGHT,
                               batch_size=32):

    return image_dataset_from_directory(
        images_path,
        labels=None,
        label_mode=None,
        batch_size=batch_size,
        image_size=(height, width),
        shuffle=True,
        seed=42,
    ).map(
        lambda x: x / 255.0
    ).map(
        lambda x: (x, x) # X and Y are the same (for the auto-encoder)
    )

dataset = build_unsupervised_dataset(batch_size=8)

Found 100 files belonging to 1 classes.


In [6]:
i = iter(dataset)
e = next(i)
e[0].shape, e[1].shape

(TensorShape([8, 525, 350, 3]), TensorShape([8, 525, 350, 3]))

## models

### basic


In [7]:
def basic_encoder(embedding_dim=30):
    return models.Sequential([
        layers.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3)),
        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dense(embedding_dim, activation="relu"),
    ], name="encoder")
    
def basic_decoder(embedding_dim=30):
    return models.Sequential([
        layers.Input(shape=(embedding_dim,)),
        layers.Dense(128, activation="relu"),
        layers.Dense(IMAGE_HEIGHT * IMAGE_WIDTH * 3, activation="sigmoid"),
        layers.Reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 3)),
    ], name="decoder")


def basic_autoencoder(embedding_dim=30):
    return models.Sequential([
        basic_encoder(embedding_dim),
        basic_decoder(embedding_dim),
    ])

In [8]:
autoencoder = basic_autoencoder(3)
encoder = autoencoder.get_layer('encoder')

In [9]:
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(dataset, epochs=1)



<keras.src.callbacks.History at 0x7fbbe433d690>

* get encodings of batch of images

In [13]:
batch = dataset.take(1)

encodings = encoder.predict(batch)
encodings.shape



(8, 3)

* get images after processing by the autoencoder

In [12]:
processed_imgs = autoencoder.predict(batch)
processed_imgs.shape




(8, 525, 350, 3)

### proper autoencoder

In [None]:
layer = layers.Tr

In [None]:
def conv_encoder(embed_dim=30, image_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3)):
    return models.Sequential([
        layers.Input(shape=image_shape),
        layers.Conv2D(filters=32, kernel_size=3, strides=2, activation='relu'),
        layers.Conv2D(filters=64, kernel_size=3, strides=2, activation='relu'),
        layers.Flatten(),
        layers.Dense(embed_dim),
    ])
    
def conv_decoder(embed_dim, image_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3)):
    return models.Sequential([
        layers.Input(shape=(embed_dim,)),
        layers.Dense(units=32, activation='relu'),
        layers.Reshape(target_shape=(4, 4, 2)),
        layers.Conv2DTranspose(filters=64, kernel_size=3, strides=2, activation='relu', padding='same'),
        layers.Conv2DTranspose(filters=32, kernel_size=3, strides=2, activation='relu', padding='same'),
        layers.Conv2DTranspose(filters=3, kernel_size=3, strides=1, activation='sigmoid', padding='same')
    ])

In [None]:
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

# This is our input image
input_img = keras.Input(shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 3))


# "encoded" is the encoded representation of the input
encoded = layers.Dense(encoding_dim, activation='relu')(input_img)
# "decoded" is the lossy reconstruction of the input
decoded = layers.Dense(784, activation='sigmoid')(encoded)

# This model maps an input to its reconstruction
autoencoder = keras.Model(input_img, decoded)
