In [None]:
import keras
from keras import ops
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from PIL import Image

In [None]:
#The first step is to load the data. The data is separated between the monet paintings that will be taken as real images for the discriminator
#The images are resized to 64x64 to ensure the model trains faster and would require less resources
monet_data = tf.keras.utils.image_dataset_from_directory("/kaggle/input/gan-getting-started/monet_jpg", seed=42, image_size=(64, 64), label_mode=None)

In [None]:
#Next the data is normalized. There are 300 Monet paintings and 7038 photos in the Kaggle dataset
monet_norm_data = monet_data.map(lambda x: x / 255.0)

In [None]:
#Here I plot some of the Monet paintings to get a sense of what the goal for the GAN is
plt.imshow(list(monet_norm_data.as_numpy_iterator())[5][5])

In [None]:
plt.imshow(list(monet_norm_data.as_numpy_iterator())[8][8])

In [None]:
monet_norm_data.element_spec

Each photo in the dataset is 64x64 and then is divided into 3 color channels. With the data normalized, the next step is to create the model.

In [None]:
#For the model I will utilize DCGANs with different parameters
#I started with the base model shown in the keras guide to serve as a control
base_generator = keras.Sequential([
    keras.layers.InputLayer(shape=(128,)),
    keras.layers.Dense(8*8*128),
    keras.layers.Reshape((8,8,128)),
    keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
], name="control_generator")



In [None]:
#The first change is to make the kernel smaller
kernel_generator = keras.Sequential([
    keras.layers.InputLayer(shape=(128,)),
    keras.layers.Dense(8*8*128),
    keras.layers.Reshape((8,8,128)),
    keras.layers.Conv2DTranspose(128, kernel_size=2, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2DTranspose(256, kernel_size=2, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2DTranspose(512, kernel_size=2, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
], name="kernel_generator")
kernel_generator.summary()

In [None]:
#The next modification is to make the slope of the leakyrelu steeper
relu_generator = keras.Sequential([
    keras.layers.InputLayer(shape=(128,)),
    keras.layers.Dense(8*8*128),
    keras.layers.Reshape((8,8,128)),
    keras.layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.5),
    keras.layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.5),
    keras.layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.5),
    keras.layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
], name="relu_generator")
relu_generator.summary()

In [None]:
#Similarly for the discriminator I started with the example as the control
base_discriminator = keras.Sequential([
    keras.Input(shape=(64, 64, 3)),
    keras.layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.2),
    keras.layers.Flatten(),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1, activation="sigmoid"),
], name="base_discriminator")
base_discriminator.summary()

In [None]:
#The only change I tested with the discriminator is to increase the slope of the leaky relu function
slope_discriminator = keras.Sequential([
    keras.Input(shape=(64, 64, 3)),
    keras.layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.5),
    keras.layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.5),
    keras.layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
    keras.layers.LeakyReLU(negative_slope=0.5),
    keras.layers.Flatten(),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1, activation="sigmoid"),
], name="slope_discriminator")
slope_discriminator.summary()

In [None]:
#The followimg code to assemble the GAN and override the training state can be found at https://keras.io/examples/generative/dcgan_overriding_train_step/#override-trainstep
class GAN(keras.Model):
    def __init__(self, discriminator, generator):
        super().__init__()
        self.discriminator = discriminator
        self.generator = generator

    def compile(self, d_optimizer, g_optimizer, loss_fn):
        super().compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.loss_fn = loss_fn
        self.d_loss_metric = keras.metrics.Mean(name="d_loss")
        self.g_loss_metric = keras.metrics.Mean(name="g_loss")

    @property
    def metrics(self):
        return [self.d_loss_metric, self.g_loss_metric]

    def train_step(self, real_images):
        # Sample random points in the latent space
        batch_size = ops.shape(real_images)[0]
        random_latent_vectors = keras.random.normal(
            shape=(batch_size, 128), seed=42
        )

        # Decode them to fake images
        generated_images = self.generator(random_latent_vectors)

        # Combine them with real images
        combined_images = ops.concatenate([generated_images, real_images], axis=0)

        # Assemble labels discriminating real from fake images
        labels = ops.concatenate(
            [ops.ones((batch_size, 1)), ops.zeros((batch_size, 1))], axis=0
        )
        # Add random noise to the labels - important trick!
        labels += 0.05 * tf.random.uniform(tf.shape(labels))

        # Train the discriminator
        with tf.GradientTape() as tape:
            predictions = self.discriminator(combined_images)
            d_loss = self.loss_fn(labels, predictions)
        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_optimizer.apply_gradients(
            zip(grads, self.discriminator.trainable_weights)
        )

        # Sample random points in the latent space
        random_latent_vectors = keras.random.normal(
            shape=(batch_size, 128), seed=42
        )

        # Assemble labels that say "all real images"
        misleading_labels = ops.zeros((batch_size, 1))

        # Train the generator (note that we should *not* update the weights
        # of the discriminator)!
        with tf.GradientTape() as tape:
            predictions = self.discriminator(self.generator(random_latent_vectors))
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))

        # Update metrics
        self.d_loss_metric.update_state(d_loss)
        self.g_loss_metric.update_state(g_loss)
        return {
            "d_loss": self.d_loss_metric.result(),
            "g_loss": self.g_loss_metric.result(),
        }

In [None]:
#In total as there are 2 discriminators and 3 generators, there are 6 possible GANs that can be formed
base_gan = GAN(base_discriminator, base_generator)
g_slope_gan = GAN(base_discriminator, relu_generator)
kernel_gan = GAN(base_discriminator, kernel_generator)
d_slope_gan = GAN(slope_discriminator, base_generator)
slope_gan = GAN(slope_discriminator, relu_generator)
leaky_kernel_gan = GAN(slope_discriminator, kernel_generator)


In [None]:
#As per the paper https://arxiv.org/pdf/1511.06434 the researchers found that the optimal learning rate is 0.0002
base_gan.compile(keras.optimizers.Adam(learning_rate=0.0002), keras.optimizers.Adam(learning_rate=0.0002), keras.losses.BinaryCrossentropy())
history = base_gan.fit(monet_norm_data, epochs=100)

In [None]:
#Generally the loss of the discriminator stayed low as the generator took ~25 epochs to have a consistently low loss then it spikes a bit around 80 epochs
plt.plot(history.history["d_loss"])
plt.plot(history.history["g_loss"])

In [None]:
#Overall each model will be trained over 50 epochs
g_slope_gan.compile(keras.optimizers.Adam(learning_rate=0.0002), keras.optimizers.Adam(learning_rate=0.0002), keras.losses.BinaryCrossentropy())
history = g_slope_gan.fit(monet_norm_data, epochs=100)

In [None]:
#Generally the loss of the generator diverges for the first 30 epochs but then converges and performs better than the baseline GAN
plt.plot(history.history["d_loss"])
plt.plot(history.history["g_loss"])

In [None]:
kernel_gan.compile(keras.optimizers.Adam(learning_rate=0.0002), keras.optimizers.Adam(learning_rate=0.0002), keras.losses.BinaryCrossentropy())
history = kernel_gan.fit(monet_norm_data, epochs=100)

In [None]:
#Both models seemed to stay fairly low, it is unclear how well the generator learned
plt.plot(history.history["d_loss"])
plt.plot(history.history["g_loss"])

In [None]:
d_slope_gan.compile(keras.optimizers.Adam(learning_rate=0.0002), keras.optimizers.Adam(learning_rate=0.0002), keras.losses.BinaryCrossentropy())
history = d_slope_gan.fit(monet_norm_data, epochs=100)

In [None]:
#From the plot the generator had trouble fooling the discriminator
plt.plot(history.history["d_loss"])
plt.plot(history.history["g_loss"])

In [None]:
slope_gan.compile(keras.optimizers.Adam(learning_rate=0.0002), keras.optimizers.Adam(learning_rate=0.0002), keras.losses.BinaryCrossentropy())
history = slope_gan.fit(monet_norm_data, epochs=100)

In [None]:
plt.plot(history.history["d_loss"])
plt.plot(history.history["g_loss"])

In [None]:
leaky_kernel_gan.compile(keras.optimizers.Adam(learning_rate=0.0002), keras.optimizers.Adam(learning_rate=0.0002), keras.losses.BinaryCrossentropy())
history = leaky_kernel_gan.fit(monet_norm_data, epochs=100)

In [None]:
plt.plot(history.history["d_loss"])
plt.plot(history.history["g_loss"])

In [None]:
#To visualize how well each GAN predict on a random vector generated to represent vectors from the latent space
random_latent_vector = keras.random.normal(shape=(1, 128), seed=42)


In [None]:
base_imgs = base_gan.generator.predict(random_latent_vector)
plt.imshow(base_imgs[0])

In [None]:
g_slope_imgs = g_slope_gan.generator.predict(random_latent_vector)
plt.imshow(g_slope_imgs[0])

In [None]:
kernel_imgs = kernel_gan.generator.predict(random_latent_vector)
plt.imshow(kernel_imgs[0])

In [None]:
d_slope_img = d_slope_gan.generator.predict(random_latent_vector)
plt.imshow(d_slope_img[0])

In [None]:
slope_img = slope_gan.generator.predict(random_latent_vector)
plt.imshow(slope_img[0])

In [None]:
leaky_kernel_img = leaky_kernel_gan.generator.predict(random_latent_vector)
plt.imshow(leaky_kernel_img[0])

In [None]:
#Based on the above data the 'slope' GAN performed the best, additionally modifying the discriminator appeared to have little affect
final_GAN = slope_gan
submission_latent_vector = keras.random.normal(shape=(8000, 128), seed=42)
submission_imgs = final_GAN.generator.predict(submission_latent_vector)

In [None]:
submission_imgs*=255
submission_imgs.shape
submission_imgs[56]

In [None]:
#Resize the image to 256x256 as per the Kaggle submission guidelines
kaggle_imgs = tf.image.resize(submission_imgs,[256,256])

In [None]:
import zipfile
from io import BytesIO
i = 0
with zipfile.ZipFile("/kaggle/working/images.zip", "w") as zip_file:
    for img in kaggle_imgs:
        jpg = Image.fromarray(img.numpy().astype(np.uint8))
        temp = BytesIO()
        jpg.save(temp, format="jpeg")
        temp.seek(0)
        zip_file.writestr(f"Generated_Image_{i}.jpg", temp.read())
        i+=1

In Conclusion, modifications to the discriminator had little effect on the results. For the generator, there was a performance boost when making the negative slope of the LeakyReLu larger and by making the filter size smaller in the generator. Generally, the GAN with the smallest generator loss was still was making a cloudy image. The next step, would be to look into longer training of the GANs as well as utilizing an encoder to convert the photos into the latent space instead of relying on random vectors.

References:
1. Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks https://arxiv.org/abs/1511.06434.
2. DCGAN to generate face images https://keras.io/examples/generative/dcgan_overriding_train_step/#train-the-endtoend-model