In [1]:
# Import data and reduce size of dataset to allow flexibility in improving performance through hyper-parameter value changes
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import scipy
from scipy.linalg import sqrtm
import time

# Reduce dataset size
def reduce_dataset(dataset, num_samples=10000):
    return dataset.take(num_samples)

# Load dataset
(train_images, _), (_, _) = tf.keras.datasets.mnist.load_data()
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32')
train_images = (train_images - 127.5) / 127.5
train_dataset = tf.data.Dataset.from_tensor_slices(train_images)
reduced_train_dataset = reduce_dataset(train_dataset)

In [2]:
"""
Set up to run the baseline model on reduced dataset with same hyperparameter values

"""
# Generator model definition
def make_generator_model():
    model = tf.keras.Sequential([
        layers.Input(shape=(100,)),  # Explicitly define input shape
        layers.Dense(7 * 7 * 256, use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Reshape((7, 7, 256)),
        layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')
    ])
    return model

# Discriminator model definition
def make_discriminator_model():
    model = tf.keras.Sequential([
        layers.Input(shape=(28, 28, 1)),  # Explicitly define input shape
        layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same'),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        layers.Flatten(),
        layers.Dense(1)
    ])
    return model


def calculate_fid(real_images, generated_images):
    # Ensure real_images and generated_images are in correct shape
    if real_images.shape[-1] != 3:
        real_images = tf.image.grayscale_to_rgb(real_images)  # Convert grayscale to RGB
    if generated_images.shape[-1] != 3:
        generated_images = tf.image.grayscale_to_rgb(generated_images)  # Convert grayscale to RGB

    # Resize images to (75, 75, 3)
    real_resized = tf.image.resize(real_images, (75, 75))
    generated_resized = tf.image.resize(generated_images, (75, 75))

    # Extract features using InceptionV3
    inception_model = tf.keras.applications.InceptionV3(include_top=False, pooling='avg', input_shape=(75, 75, 3))
    real_act = inception_model.predict(real_resized, verbose=0)
    generated_act = inception_model.predict(generated_resized, verbose=0)

    # Calculate mean and covariance
    mu_real, sigma_real = np.mean(real_act, axis=0), np.cov(real_act, rowvar=False)
    mu_generated, sigma_generated = np.mean(generated_act, axis=0), np.cov(generated_act, rowvar=False)

    # Calculate FID score
    diff = mu_real - mu_generated
    covmean = scipy.linalg.sqrtm(sigma_real.dot(sigma_generated), disp=False)[0]

    # Numerical stability check
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid_score = diff.dot(diff) + np.trace(sigma_real + sigma_generated - 2 * covmean)
    return fid_score

# Training function
def train_and_evaluate(train_dataset, epochs, noise_dim, batch_size, generator_fn, discriminator_fn, fid_batch_size=1000):
    generator = generator_fn()
    discriminator = discriminator_fn()

    generator_optimizer = tf.keras.optimizers.Adam(1e-4)
    discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

    for epoch in range(epochs):
        for real_images in train_dataset.batch(batch_size):  # Ensure correct batching
            noise = tf.random.normal([batch_size, noise_dim])

            with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
                generated_images = generator(noise, training=True)
                real_output = discriminator(real_images, training=True)
                fake_output = discriminator(generated_images, training=True)

                gen_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(fake_output), fake_output)
                disc_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(real_output), real_output) + \
                            tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.zeros_like(fake_output), fake_output)

            gradients_gen = gen_tape.gradient(gen_loss, generator.trainable_variables)
            gradients_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

            generator_optimizer.apply_gradients(zip(gradients_gen, generator.trainable_variables))
            discriminator_optimizer.apply_gradients(zip(gradients_disc, discriminator.trainable_variables))

    # Calculate FID score
    fid_noise = tf.random.normal([fid_batch_size, noise_dim])
    generated_images = generator(fid_noise, training=False)

    # Ensure real_images has a batch dimension
    real_images = tf.concat([batch for batch in train_dataset.take(fid_batch_size // batch_size)], axis=0)
    if len(real_images.shape) == 3:  # Add batch dimension if missing
        real_images = tf.expand_dims(real_images, axis=0)

    # Convert to RGB and resize
    real_images_resized = tf.image.grayscale_to_rgb(real_images)
    real_images_resized = tf.image.resize(real_images_resized, (75, 75))

    # Ensure real_images_resized has a batch dimension
    if len(real_images_resized.shape) == 3:  # Add batch dimension if missing
        real_images_resized = tf.expand_dims(real_images_resized, axis=0)

    # Prepare generated images
    generated_images = generator(tf.random.normal([fid_batch_size, noise_dim]), training=False)
    if len(generated_images.shape) == 3:  # Add batch dimension if missing
        generated_images = tf.expand_dims(generated_images, axis=0)

    # Convert to RGB and resize
    generated_images_resized = tf.image.grayscale_to_rgb(generated_images)
    generated_images_resized = tf.image.resize(generated_images_resized, (75, 75))

    print("Shape of real_images_resized:", real_images_resized.shape)
    print("Shape of generated_images_resized:", generated_images_resized.shape)

    # Calculate FID score
    fid_score = calculate_fid(real_images_resized, generated_images_resized)
    return fid_score


In [3]:
# Run baseline model and output FID score
baseline_fid = train_and_evaluate(reduced_train_dataset, epochs=5, noise_dim=100, batch_size=64,
                                  generator_fn=make_generator_model, discriminator_fn=make_discriminator_model)
print(f"Baseline FID: {baseline_fid}")


"""
Baseline FID: 2409.122352152087

"""

Shape of real_images_resized: (1, 75, 75, 3)
Shape of generated_images_resized: (1000, 75, 75, 3)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 0us/step
Baseline FID: 2339.4321782401666


'\nBaseline FID: 2409.122352152087\n\n'

In [None]:
"""
create model and functions with slightly different setup to handle varied hyper-parameter values
"""

# Generator model definition with variable noise_dim
def make_generator_model(noise_dim=100):
    model = tf.keras.Sequential([
        layers.Input(shape=(noise_dim,)),  # Use the noise_dim parameter here
        layers.Dense(7 * 7 * 256, use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Reshape((7, 7, 256)),
        layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False),
        layers.BatchNormalization(),
        layers.LeakyReLU(),
        layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')
    ])
    return model

# Discriminator model definition
def make_discriminator_model():
    model = tf.keras.Sequential([
        layers.Input(shape=(28, 28, 1)),  # Explicitly define input shape
        layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same'),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
        layers.LeakyReLU(),
        layers.Dropout(0.3),
        layers.Flatten(),
        layers.Dense(1)
    ])
    return model


def calculate_fid(real_images, generated_images):
    # Ensure real_images and generated_images are in correct shape
    if real_images.shape[-1] != 3:
        real_images = tf.image.grayscale_to_rgb(real_images)  # Convert grayscale to RGB
    if generated_images.shape[-1] != 3:
        generated_images = tf.image.grayscale_to_rgb(generated_images)  # Convert grayscale to RGB

    # Resize images to (75, 75, 3)
    real_resized = tf.image.resize(real_images, (75, 75))
    generated_resized = tf.image.resize(generated_images, (75, 75))

    # Extract features using InceptionV3
    inception_model = tf.keras.applications.InceptionV3(include_top=False, pooling='avg', input_shape=(75, 75, 3))
    real_act = inception_model.predict(real_resized, verbose=0)
    generated_act = inception_model.predict(generated_resized, verbose=0)

    # Calculate mean and covariance
    mu_real, sigma_real = np.mean(real_act, axis=0), np.cov(real_act, rowvar=False)
    mu_generated, sigma_generated = np.mean(generated_act, axis=0), np.cov(generated_act, rowvar=False)

    # Calculate FID score
    diff = mu_real - mu_generated
    covmean = scipy.linalg.sqrtm(sigma_real.dot(sigma_generated), disp=False)[0]

    # Numerical stability check
    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid_score = diff.dot(diff) + np.trace(sigma_real + sigma_generated - 2 * covmean)
    return fid_score

# Training function
def train_and_evaluate(train_dataset, epochs, noise_dim, batch_size, generator_fn, discriminator_fn, fid_batch_size=1000):
    generator = generator_fn(noise_dim=noise_dim)  # Pass noise_dim to generator
    discriminator = discriminator_fn()

    generator_optimizer = tf.keras.optimizers.Adam(1e-4)
    discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

    for epoch in range(epochs):
        for real_images in train_dataset.batch(batch_size):  # Ensure correct batching
            noise = tf.random.normal([batch_size, noise_dim])

            with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
                generated_images = generator(noise, training=True)
                real_images = tf.reshape(real_images, (-1, 28, 28, 1))
                real_output = discriminator(real_images, training=True)
                fake_output = discriminator(generated_images, training=True)

                gen_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(fake_output), fake_output)
                disc_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(real_output), real_output) + \
                            tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.zeros_like(fake_output), fake_output)

            gradients_gen = gen_tape.gradient(gen_loss, generator.trainable_variables)
            gradients_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

            generator_optimizer.apply_gradients(zip(gradients_gen, generator.trainable_variables))
            discriminator_optimizer.apply_gradients(zip(gradients_disc, discriminator.trainable_variables))

    # Calculate FID score
    fid_noise = tf.random.normal([fid_batch_size, noise_dim])
    generated_images = generator(fid_noise, training=False)

    # Collect real_images properly
    real_images = []
    for batch in train_dataset.take(fid_batch_size // batch_size):
        real_images.append(batch)
    real_images = tf.concat(real_images, axis=0)

    # Convert grayscale to RGB
    real_images_resized = tf.image.grayscale_to_rgb(real_images)
    real_images_resized = tf.image.resize(real_images_resized, (75, 75))

    # Prepare generated images
    generated_images_resized = tf.image.grayscale_to_rgb(generated_images)
    generated_images_resized = tf.image.resize(generated_images_resized, (75, 75))

    # Calculate FID score
    fid_score = calculate_fid(real_images_resized, generated_images_resized)
    return fid_score

# Parameter sweep
def parameter_sweep(train_dataset, generator_fn, discriminator_fn):
    results = []

    learning_rates = [1e-4, 2e-4]
    noise_dims = [100, 150]
    batch_sizes = [64, 128]

    for lr in learning_rates:
        for noise_dim in noise_dims:
            for batch_size in batch_sizes:
                dataset = train_dataset.batch(batch_size, drop_remainder=True)
                #print(f"Training with lr={lr}, noise_dim={noise_dim}, batch_size={batch_size}...")
                fid_score = train_and_evaluate(dataset, epochs=5, noise_dim=noise_dim, batch_size=batch_size,
                                               generator_fn=generator_fn, discriminator_fn=discriminator_fn)
                #print(f"FID score: {fid_score}")

                results.append((lr, noise_dim, batch_size, fid_score))

    # Sort results by FID in ascending order (lower is better) and display the top 5
    results = sorted(results, key=lambda x: x[3])[:5]
    print("\nTop 5 results:")
    for result in results:
        print(f"Learning Rate: {result[0]}, Noise Dim: {result[1]}, Batch Size: {result[2]}, FID Score: {result[3]}")

In [None]:
# Run parameter sweep function to see which hyper-parameter values returned the lowest FID score (lower score is better)
parameter_sweep(reduced_train_dataset, make_generator_model, make_discriminator_model)

"""
Results shared below (Originally run in Google Colab)
Top 5 results below:
Learning Rate: 0.0001, Noise Dim: 150, Batch Size: 64, FID Score: 942.9753430657199
Learning Rate: 0.0001, Noise Dim: 150, Batch Size: 128, FID Score: 954.4840038581231
Learning Rate: 0.0001, Noise Dim: 100, Batch Size: 64, FID Score: 1006.7283447952834
Learning Rate: 0.0002, Noise Dim: 150, Batch Size: 128, FID Score: 1010.5490313632524
Learning Rate: 0.0002, Noise Dim: 150, Batch Size: 64, FID Score: 1053.3903247668918
"""



In [14]:
"""
Attempt to improve model architecture with best parameters
"""

import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import scipy.linalg

# Generator model with enhanced architecture
def make_generator_model_v2(noise_dim=100):
    inputs = layers.Input(shape=(noise_dim,))
    x = layers.Dense(7 * 7 * 256, use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU()(x)
    x = layers.Reshape((7, 7, 256))(x)

    # Residual block 1
    skip1 = layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(skip1)
    x = layers.LeakyReLU()(x)

    # Residual block 2
    skip2 = layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(skip2)
    x = layers.LeakyReLU()(x)
    
    # Skip connection for residual learning
    x = layers.Add()([x, skip2])  # Adding skip connection

    # Output layer
    outputs = layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')(x)
    
    return tf.keras.Model(inputs, outputs)

# Discriminator model with spectral normalization
def make_discriminator_model_v2():
    def spectral_norm(layer):
        # For Spectral Normalization, you can use the standard layer or add custom logic if required.
        # This placeholder just returns the layer itself for simplicity.
        return layer

    inputs = layers.Input(shape=(28, 28, 1))
    x = layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same')(inputs)
    x = spectral_norm(x)
    x = layers.LeakyReLU()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same')(x)
    x = spectral_norm(x)
    x = layers.LeakyReLU()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Flatten()(x)
    outputs = layers.Dense(1)(x)  # No activation for logits
    
    return tf.keras.Model(inputs, outputs)

# FID score calculation
def calculate_fid(real_images, generated_images):
    if real_images.shape[-1] != 3:
        real_images = tf.image.grayscale_to_rgb(real_images)  # Convert grayscale to RGB
    if generated_images.shape[-1] != 3:
        generated_images = tf.image.grayscale_to_rgb(generated_images)  # Convert grayscale to RGB

    real_resized = tf.image.resize(real_images, (75, 75))
    generated_resized = tf.image.resize(generated_images, (75, 75))

    inception_model = tf.keras.applications.InceptionV3(include_top=False, pooling='avg', input_shape=(75, 75, 3))
    real_act = inception_model.predict(real_resized, verbose=0)
    generated_act = inception_model.predict(generated_resized, verbose=0)

    mu_real, sigma_real = np.mean(real_act, axis=0), np.cov(real_act, rowvar=False)
    mu_generated, sigma_generated = np.mean(generated_act, axis=0), np.cov(generated_act, rowvar=False)

    diff = mu_real - mu_generated
    covmean = scipy.linalg.sqrtm(sigma_real.dot(sigma_generated), disp=False)[0]

    if np.iscomplexobj(covmean):
        covmean = covmean.real

    fid_score = diff.dot(diff) + np.trace(sigma_real + sigma_generated - 2 * covmean)
    return fid_score

# Training function
def train_and_evaluate(train_dataset, epochs, noise_dim, batch_size, generator_fn, discriminator_fn, fid_batch_size=1000):
    generator = generator_fn(noise_dim=noise_dim)
    discriminator = discriminator_fn()

    generator_optimizer = tf.keras.optimizers.Adam(1e-4)
    discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

    for epoch in range(epochs):
        for real_images in train_dataset.batch(batch_size):
            noise = tf.random.normal([batch_size, noise_dim])

            with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
                generated_images = generator(noise, training=True)
                real_images = tf.reshape(real_images, (-1, 28, 28, 1))
                real_output = discriminator(real_images, training=True)
                fake_output = discriminator(generated_images, training=True)

                gen_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(fake_output), fake_output)
                disc_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.ones_like(real_output), real_output) + \
                            tf.keras.losses.BinaryCrossentropy(from_logits=True)(tf.zeros_like(fake_output), fake_output)

            gradients_gen = gen_tape.gradient(gen_loss, generator.trainable_variables)
            gradients_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

            generator_optimizer.apply_gradients(zip(gradients_gen, generator.trainable_variables))
            discriminator_optimizer.apply_gradients(zip(gradients_disc, discriminator.trainable_variables))

    fid_noise = tf.random.normal([fid_batch_size, noise_dim])
    generated_images = generator(fid_noise, training=False)

    real_images = []
    for batch in train_dataset.take(fid_batch_size // batch_size):
        real_images.append(batch)
    real_images = tf.concat(real_images, axis=0)

    real_images_resized = tf.image.grayscale_to_rgb(real_images)
    real_images_resized = tf.image.resize(real_images_resized, (75, 75))
    generated_images_resized = tf.image.grayscale_to_rgb(generated_images)
    generated_images_resized = tf.image.resize(generated_images_resized, (75, 75))

    fid_score = calculate_fid(real_images_resized, generated_images_resized)
    return fid_score

# Parameter sweep
def parameter_sweep(train_dataset, generator_fn, discriminator_fn):
    results = []

    learning_rates = [1e-4]
    noise_dims = [150]
    batch_sizes = [64]

    for lr in learning_rates:
        for noise_dim in noise_dims:
            for batch_size in batch_sizes:
                dataset = train_dataset.batch(batch_size, drop_remainder=True)
                fid_score = train_and_evaluate(dataset, epochs=10, noise_dim=noise_dim, batch_size=batch_size,
                                               generator_fn=generator_fn, discriminator_fn=discriminator_fn)
                results.append((lr, noise_dim, batch_size, fid_score))

    print("\nResult:")
    for result in results:
        print(f"Learning Rate: {result[0]}, Noise Dim: {result[1]}, Batch Size: {result[2]}, FID Score: {result[3]}")

# Execute parameter sweep
parameter_sweep(reduced_train_dataset, make_generator_model_v2, make_discriminator_model_v2)


Result:
Learning Rate: 0.0001, Noise Dim: 150, Batch Size: 64, FID Score: 891.9216246410855
