In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Brief Description of Problem

In this assignment we are going to try to use a GAN to create fake images of dogs. I will look at the size/shape of the data and any necessary cleaning procedures during EDA.  This challenge was the original Kaggle introduction to GANs until the competition closed recently.


# EDA

Let's load the data and take a look at a few sample pictures.  Also, I want to look at the annotations file and see if it is anything necessary for this assignment.

In [None]:
import zipfile

Dataset = "all-dogs"

# Will unzip the files so that you can see them..
with zipfile.ZipFile("../input/generative-dog-images/"+Dataset+".zip","r") as z:
    z.extractall(".")

In [None]:
PATH = '/kaggle/working/all-dogs/'
images = os.listdir(PATH)
print(f'There are {len(images)} pictures of dogs.')

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(12,10))

for indx, axis in enumerate(axes.flatten()):
    rnd_indx = np.random.randint(0, len(os.listdir(PATH)))
    img = plt.imread(PATH + images[rnd_indx])
    imgplot = axis.imshow(img)
    axis.set_title(images[rnd_indx])
    axis.set_axis_off()
plt.tight_layout(rect=[0, 0.03, 1, 0.95])

So, we see a few example pictures of the dogs.  These all appear to be fairly high quality images.  Also, some picture include humans and other objects.  

Given that these are high quality images, I want to see how many pixels are in each picture and find if they are consistent or not.

In [None]:
img_shapes = set()

imgs = os.listdir(PATH)

for i in range(len(imgs)):
    curr_img = plt.imread(PATH + imgs[i])
    img_shapes.add(curr_img.shape)

len(img_shapes)




So, we see there are over 4500 different image shapes.  This would be a problem if we wanted to use the raw images since they are of various different sizes.  The best method to handle this would be to convert all images to a standard size.  I will initially use 64x64 images (in fact, it is really 64x64x3 for RGB coloring)

Next, lets look at the annotation file.

In [None]:
import zipfile

Dataset = "Annotation"

# Will unzip the files so that you can see them..
with zipfile.ZipFile("../input/generative-dog-images/"+Dataset+".zip","r") as z:
    z.extractall(".")


PATH_2 = '/kaggle/working/Annotation/'
images = os.listdir(PATH_2)
len(images)

In [None]:
for i in range(5):
    rnd_indx = np.random.randint(0, len(os.listdir(PATH_2)))
    print(images[rnd_indx])

We see these are just the dog breeds.  I will not be using this for this assignment.  This may have been necessary in the original competition, but it is closed now so I cant submit anything anyway.

# Data Preprocessing and Model Architecture

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import os

In [None]:
dataset_reshaped = keras.preprocessing.image_dataset_from_directory(
    PATH, label_mode=None, image_size=(64, 64), batch_size=32
)
dataset_reshaped = dataset_reshaped.map(lambda x: x / 255.0)

In [None]:

for x in dataset_reshaped:
    plt.axis("off")
    plt.imshow((x.numpy() * 255).astype("int32")[0])
    break

So, I have reshaped each image into 64x64x3.  This allows for consistency to use as the input shape in our model.

Next, I will create the discriminator.  I struggled with developing my own architecture that seemingly worked in any sort of timely fashion, so I initially used the architecture as seen in our lecture and found on the keras tutorial page <https://keras.io/examples/generative/dcgan_overriding_train_step/#dcgan-to-generate-face-images>

# Model Architecture

In [None]:
discriminator = keras.Sequential(
    [
        keras.Input(shape=(64, 64, 3)),
        layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Flatten(),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="discriminator",
)
discriminator.summary()

And now for the generator model.

In [None]:
latent_dim = 128

generator = keras.Sequential(
    [
        keras.Input(shape=(latent_dim,)),
        layers.Dense(8 * 8 * 128),
        layers.Reshape((8, 8, 128)),
        layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
    ],
    name="generator",
)
generator.summary()

Finally, I used the example code to combine the models while making one simple adjustment.  I removed the code for adding random noise to the labels.  While this is denoted as an important trick, this seemed to make my models extremely unstable and made it seemingly impossible to learn.  Perhaps this step would be useful in more standardized sets (like CIFAR-10) but the fact the images we are given often have other objects in them seemed to create too much confusion when random noise was applied to the true labels.

In [None]:
class GAN(keras.Model):
    def __init__(self, discriminator, generator, latent_dim):
        super(GAN, self).__init__()
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim

    def compile(self, d_optimizer, g_optimizer, loss_fn):
        super(GAN, self).compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.loss_fn = loss_fn
        self.d_loss_metric = keras.metrics.Mean(name="d_loss")
        self.g_loss_metric = keras.metrics.Mean(name="g_loss")

    @property
    def metrics(self):
        return [self.d_loss_metric, self.g_loss_metric]

    def train_step(self, real_images):
        # Sample random points in the latent space
        batch_size = tf.shape(real_images)[0]
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))

        # Decode them to fake images
        generated_images = self.generator(random_latent_vectors)

        # Combine them with real images
        combined_images = tf.concat([generated_images, real_images], axis=0)

        # Assemble labels discriminating real from fake images
        labels = tf.concat(
            [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0
        )
        # Add random noise to the labels - important trick!
        #labels += 0.05 * tf.random.uniform(tf.shape(labels))

        # Train the discriminator
        with tf.GradientTape() as tape:
            predictions = self.discriminator(combined_images)
            d_loss = self.loss_fn(labels, predictions)
        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_optimizer.apply_gradients(
            zip(grads, self.discriminator.trainable_weights)
        )

        # Sample random points in the latent space
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))

        # Assemble labels that say "all real images"
        misleading_labels = tf.zeros((batch_size, 1))

        # Train the generator (note that we should *not* update the weights
        # of the discriminator)!
        with tf.GradientTape() as tape:
            predictions = self.discriminator(self.generator(random_latent_vectors))
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))

        
        # Update metrics
        self.d_loss_metric.update_state(d_loss)
        self.g_loss_metric.update_state(g_loss)
        return {
            "d_loss": self.d_loss_metric.result(),
            "g_loss": self.g_loss_metric.result(),
        }

I also added callbacks to the code, showing a few images after each epoch to see how the model is improving.  This is interesting, but the first few epochs probably wont show too much learning.

In [None]:
class GANMonitor(keras.callbacks.Callback):
    def __init__(self, num_img=3, latent_dim=128):
        self.num_img = num_img
        self.latent_dim = latent_dim

    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255
        generated_images.numpy()
        for i in range(self.num_img):
            img = keras.preprocessing.image.array_to_img(generated_images[i])
            plt.imshow(img)
            plt.figure(i+1)
        plt.show()

In [None]:
epochs = 5 

gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

gan.fit(
    dataset_reshaped, epochs=epochs,callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)

As expected, you cant really determine too much from the output of the first few epochs.  Mostly just pixelated blobs. 

In [None]:
class GANMonitor(keras.callbacks.Callback):
    def __init__(self, num_img=3, latent_dim=128):
        self.num_img = num_img
        self.latent_dim = latent_dim

    def on_epoch_end(self, epoch, logs=None):
        random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255
        generated_images.numpy()
        for i in range(self.num_img):
            img = keras.preprocessing.image.array_to_img(generated_images[i])
            plt.imshow(img)
            plt.figure(i+1)
        #removed plt.show() so that images will now only print to the console when all epochs are complete

In [None]:
epochs = 50

gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

gan.fit(
    dataset_reshaped, epochs=epochs,callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)

After 50 epochs, we are starting to see some more defined shapes forming.  This is interesting but we still can not make out much.  Lesser quality images should train faster so I will experiment with 32x32x3 images and many more epochs.

In [None]:
dataset_reshaped_LO = keras.preprocessing.image_dataset_from_directory(
    PATH, label_mode=None, image_size=(32, 32), batch_size=32
)
dataset_reshaped_LO = dataset_reshaped_LO.map(lambda x: x / 255.0)

In [None]:
for x in dataset_reshaped_LO:
    plt.axis("off")
    plt.imshow((x.numpy() * 255).astype("int32")[0])
    break

We can see the image quality is significantly worse.

Using a similar model set up in order to train on the lower quality image dataset.

In [None]:
discriminator = keras.Sequential(
    [
        keras.Input(shape=(32, 32, 3)),
        layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(256, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Flatten(),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="discriminator",
)
discriminator.summary()

In [None]:
latent_dim = 256

generator = keras.Sequential(
    [
        keras.Input(shape=(latent_dim,)),
        layers.Dense(4 * 4 * 256),
        layers.Reshape((4, 4, 256)),
        layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
    ],
    name="generator",
)
generator.summary()

In [None]:
epochs = 100

gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

gan.fit(
    dataset_reshaped_LO, epochs=epochs,callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)

So, after 100 epochs the images arent particularly clear, though we are starting to see some differentiation between the images (different background colors and different shapes are beginning to form).  I will now increase the number of epochs.

In [None]:
epochs = 200

gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

gan.fit(
    dataset_reshaped_LO, epochs=epochs,callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)

We are starting to see some images taking shape.  Particularly the third image above we can see a four legged animal beginning to appear.  This is interesting and indicative of likely not only needing more epochs to train but also a more sophisticated model.  I will next return to the higher quality dataset and more epochs in an attempt to see if the produced images are any more clear.  Unfortunately, after lots of trial and error, I am running out of my alloted time on the Kaggle GPUs so only running for 75 epochs (would have preferred to try to ~150-200 epochs).  I will have to revisit this next week after the weekly allowance resets.

In [None]:
discriminator = keras.Sequential(
    [
        keras.Input(shape=(64, 64, 3)),
        layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Flatten(),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="discriminator",
)
discriminator.summary()

In [None]:
latent_dim = 128

generator = keras.Sequential(
    [
        keras.Input(shape=(latent_dim,)),
        layers.Dense(8 * 8 * 128),
        layers.Reshape((8, 8, 128)),
        layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
    ],
    name="generator",
)
generator.summary()

In [None]:
epochs = 75

gan2 = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan2.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

gan2.fit(
    dataset_reshaped, epochs=epochs,callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)

Again, we continue to see images starting to appear but they are still quite blurry.  Luckily, kaggle granted me an extension of my GPU usage of a few hours for this notebook so I am able to continue training this model for another 100 epochs.

In [None]:
gan2.fit(
    dataset_reshaped, epochs=100,callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)

# Results and Conclusion

So, we can see we are beginning to see dogs in these images.  The model was very slow to train and its not very likely anyone would mistake these images for real dogs, but this is the beginning of a promising model nonetheless.

Unfortunately, I can not submit this work to the competition since it is closed but it likely needs more work before being ready for submission anyway.

I tested multiple epochs as well as different models and different initial image size/quality.  I defnitely have more work to do to understand and use GANs for image generation but this does seem like a promising start!