In [None]:
# Install necessary packages if not already installed
# !pip install tensorflow numpy matplotlib

# Importing required libraries
import tensorflow as tf
from tensorflow.keras.layers import Dense, Reshape, Flatten, BatchNormalization, LeakyReLU, Conv2D, Conv2DTranspose
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Lambda
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os


Mounted at /content/drive


For our project, we used Google Collab. So, if you're using Google Colab, mount Google Drive and change directory accordingly.

In [None]:

from google.colab import drive
drive.mount('/content/drive')

# Change directory to where your images are stored
os.chdir('/content/drive/My Drive/crops_dataset_trie/epsilon')

Load images, preprocess them (resize, normalize), and display an example.

In [None]:
# Load and preprocess images
folder_path = '/content/drive/My Drive/crops_dataset_trie/epsilon'
file_list = os.listdir(folder_path)

#It browses all the files in the specified folder (folder_path). For each file ending in .jpg or .png, it loads the image, converts it to RGB format, resizes it to 64x64 pixels and then adds it to the images list.
images = []
for file in file_list:
    if file.endswith('.jpg') or file.endswith('.png'):
        img_path = os.path.join(folder_path, file)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (64, 64))
        images.append(img)

# Display an example image :uses Matplotlib to display the pre-processed image at index 0 of the images list to get an idea of the result of pre-processing the images
plt.figure(figsize=(8, 8))
plt.imshow(images[0])
plt.axis('off')
plt.title('Example of letter E')
plt.show()

# It concerts images to numpy array and normalize :converts the image list, which now contains pre-processed images, into a NumPy array to be compatible with numerical calculation operations. It then normalises the pixel values by dividing them by 255.0, bringing them into the range [0, 1]. This manipulation is very common in image processing for machine learning.
images = np.array(images)
images = images.astype('float32') / 255.0


We have defined the VAE model, including encoder, decoder, and loss function.

In [None]:
# Define dimensions : we define the dimensions of the input images (input_shape) as 64x64 pixel images with 3 colour channels (RGB). The size of the latent space (latent_dim) is set to 200. This size can be modified to improve the model or according to the data (our sub-directories do not have the same quantity or quality of images).
input_shape = (64, 64, 3)
latent_dim = 200

# Encoder :We create an encoder model that takes the original images as input and transforms them into a flat vector. This vector then passes through two fully connected layers (Dense) with ReLU activations to extract important features.
#ReLU (Rectified Linear Unit) is a mathematical function used in neural networks to decide which information is important to retain when processing data. It helps to make neural networks more efficient by amplifying useful signals and filtering out noise. The use of RelU was suggested by an acquaintance of Emilie GUIDI, who wishes to remain anonymous but is studying computer science at the ENS in Lyon.
inputs = Input(shape=input_shape)
x = Flatten()(inputs)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)

# Latent space parameters :These Dense layers take the input data x, which is the output of the encoder, and produce z_mean and z_log_var, which are the mean and logarithm of the variance of the latent distribution respectively.
#This allows important characteristics of the data to be represented concisely and probabilistically, facilitating the generation and accurate reconstruction of new data in a Variational Autoencoder (VAE).
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Sampling function : here's a simplistic explanation of how sampling works to get the idea = given a standard normal distribution represented by a centred shape with denser points in the middle, we use two parameters, z_mean and z_log_var. These parameters act to stretch or compress this normal distribution, influencing where the points will be more concentrated or dispersed. Using these parameters, we sample points (z) in latent space.
def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.random.normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

# Decoder :In this section, we define how the decoder reconstructs data from latent space in a Variational Autoencoder (VAE). The lines of code show that we take samples of the latent space (decoder_inputs), pass them through several layers of neurons (Dense), activated by ReLU to learn complex representations, then finally use an output layer with sigmoid activation to generate values between 0 and 1, corresponding to each pixel of the reconstructed image, which is then reshaped to match the original input shape (input_shape).
#In the context of a Variational Autoencoder (VAE) decoder, we have chosen sigmoid activation to be applied to the last layer of neurons to produce output values in the range 0 to 1. This is particularly appropriate as we are dealing with image data where pixel values are typically normalised between 0 and 1.
decoder_inputs = Input(shape=(latent_dim,))
x = Dense(256, activation='relu')(decoder_inputs)
x = Dense(512, activation='relu')(x)
x = Dense(np.prod(input_shape), activation='sigmoid')(x)
outputs = Reshape(input_shape)(x)

# Models :these lines of code thus define the three important parts of our Variational Autoencoder (VAE) model. As we have seen, the first, called the encoder, takes the input data and transforms it into a simplified representation in what we call a 'latent space'. This captures the main characteristics of the data. Next, the "decoder" takes this simplified representation and transforms it again into a reconstructed version of the original data. Finally, the full VAE model uses both the encoder and the decoder to learn how to represent and generate new data from this simplified representation.
encoder = Model(inputs, z_mean)
decoder = Model(decoder_inputs, outputs)
vae_outputs = decoder(z)
vae = Model(inputs, vae_outputs)

# VAE loss function : To create the vae_loss function used in a Variational Autoencoder (VAE), we asked ChatGPT for help. This function is important because it helps us measure two things: firstly, how accurately the VAE reconstructs the input data, and secondly, how efficiently it organises the data in a latent space to generate new data. By optimising this function during training, the VAE learns to faithfully reproduce the data while maintaining a useful representation of the information.
def vae_loss(inputs, outputs):
    reconstruction_loss = binary_crossentropy(K.flatten(inputs), K.flatten(outputs))
    reconstruction_loss *= np.prod(input_shape)
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    return K.mean(reconstruction_loss + kl_loss)

# Adding VAE loss to the model
vae.add_loss(vae_loss(inputs, vae_outputs))

# Compile VAE model : This line of code compiles the Variational Autoencoder (VAE) model for the training phase by specifying the optimizer to be used, in this case Adam(). Compiling the model means preparing the neural network for training by defining how it should update itself when it sees data, in particular by which optimisation algorithm it should adjust its weights and biases. Adam is a common choice of optimizer in deep learning, which is why we chose it.
vae.compile(optimizer=Adam())


Train the VAE model and generate images using the trained decoder !

In [None]:
# Training VAE :The VAE is trained on the image data using itself as the target (images) for reconstruction. This is done for 30 epochs (complete iterations on the whole data set) with a batch size of 35, which means that the data is divided into batches of 35 images for training. We will try to modify these hyperparameters to refine our results.
vae.fit(images, images, epochs=30, batch_size=35)

# Generating random latent vectors :Random latent vectors are generated from a normal distribution with a size of (num_images_to_generate, latent_dim). This creates 10 random latent vectors, each with a latent_dim dimension, which is the size of the latent space learned by the VAE.
num_images_to_generate = 10
latent_vectors = np.random.normal(size=(num_images_to_generate, latent_dim))

# Generate images using decoder : The decoder uses the latent vectors generated to reconstruct images from the latent space. Each latent vector is transformed into an image by the decoder.
generated_images = decoder.predict(latent_vectors)

# Display generated images :The images generated are displayed in a grid for viewing. Each image corresponds to a reconstruction generated by the decoder from random latent vectors. The x and y axis is deactivated (plt.axis("off")) to show only the images themselves, with a numbered title for each image.
plt.figure(figsize=(20, 4))
for i in range(num_images_to_generate):
    ax = plt.subplot(2, num_images_to_generate // 2, i + 1)
    plt.imshow(generated_images[i])
    plt.axis("off")
    plt.title(f"Image {i + 1}")
plt.show()
