In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity.keras import UpdatePruningStep



In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

In [3]:
def create_cnn():
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        layers.MaxPooling2D(2,2),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D(2,2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [4]:
def build_generator():
    noise = tf.keras.Input(shape=(100,))
    x = layers.Dense(128 * 7 * 7, activation='relu')(noise)
    x = layers.Reshape((7, 7, 128))(x)
    x = layers.UpSampling2D()(x)
    x = layers.Conv2D(128, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.UpSampling2D()(x)
    x = layers.Conv2D(64, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.Conv2D(1, kernel_size=3, padding='same', activation='tanh')(x)
    return tf.keras.models.Model(noise, x)

In [5]:
def build_discriminator():
    img = tf.keras.Input(shape=(28, 28, 1))
    
    # Encoder
    x = layers.Conv2D(64, kernel_size=3, strides=2, padding='same', activation='relu')(img)
    x = layers.Conv2D(128, kernel_size=3, strides=2, padding='same', activation='relu')(x)
    x = layers.Flatten()(x)
    encoded = layers.Dense(64, activation='relu')(x)

    # Decoder
    x = layers.Dense(7 * 7 * 128, activation='relu')(encoded)
    x = layers.Reshape((7, 7, 128))(x)
    x = layers.UpSampling2D()(x)
    x = layers.Conv2D(128, kernel_size=3, padding='same', activation='relu')(x)
    x = layers.UpSampling2D()(x)
    decoded = layers.Conv2D(1, kernel_size=3, padding='same', activation='tanh')(x)
    
    return tf.keras.models.Model(img, decoded)

In [8]:
def train_began(generator, discriminator, epochs=10000, batch_size=64, gamma=0.5, lambda_k=0.001):
    optimizer = tf.keras.optimizers.legacy.Adam(0.0002, 0.5)
    
    k = 0.0  # balance variable
    
    for epoch in range(epochs):
        # ---------------------
        #  Train Discriminator
        # ---------------------
        real_imgs = x_train[np.random.randint(0, x_train.shape[0], batch_size)]
        noise = np.random.normal(0, 1, (batch_size, 100))
        gen_imgs = generator.predict(noise)

        # Get reconstructions
        real_recon = discriminator(real_imgs)
        fake_recon = discriminator(gen_imgs)

        # Compute L1 loss (reconstruction loss)
        d_real_loss = tf.reduce_mean(tf.abs(real_imgs - real_recon))
        d_fake_loss = tf.reduce_mean(tf.abs(gen_imgs - fake_recon))

        d_loss = d_real_loss - k * d_fake_loss

        # Update discriminator
        with tf.GradientTape() as tape:
            real_recon = discriminator(real_imgs, training=True)
            fake_recon = discriminator(generator(noise), training=True)
            d_real_loss = tf.reduce_mean(tf.abs(real_imgs - real_recon))
            d_fake_loss = tf.reduce_mean(tf.abs(gen_imgs - fake_recon))
            d_loss = d_real_loss - k * d_fake_loss
        grads = tape.gradient(d_loss, discriminator.trainable_variables)
        optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))

        # ---------------------
        #  Train Generator
        # ---------------------
        noise = np.random.normal(0, 1, (batch_size, 100))
        with tf.GradientTape() as tape:
            gen_imgs = generator(noise, training=True)
            fake_recon = discriminator(gen_imgs, training=True)
            g_loss = tf.reduce_mean(tf.abs(gen_imgs - fake_recon))
        grads = tape.gradient(g_loss, generator.trainable_variables)
        optimizer.apply_gradients(zip(grads, generator.trainable_variables))

        # Update balance variable
        k = k + lambda_k * (gamma * d_real_loss - d_fake_loss)
        k = np.clip(k, 0, 1)

        # Measure convergence
        M = d_real_loss + tf.abs(gamma * d_real_loss - d_fake_loss)

        if epoch % 1000 == 0:
            print(f"Epoch {epoch}/{epochs} | D loss: {d_loss:.4f} | G loss: {g_loss:.4f} | M: {M:.4f} | k: {k:.4f}")

In [9]:
# Initialize the generator and discriminator
generator = build_generator()
discriminator = build_discriminator()

# Train the DCGAN
train_began(generator, discriminator, epochs=10000, batch_size=64)

Epoch 0/10000 | D loss: 0.1387 | G loss: 0.0445 | M: 0.1638 | k: 0.0000
Epoch 1000/10000 | D loss: 0.0331 | G loss: 0.0144 | M: 0.0358 | k: 0.0001
Epoch 2000/10000 | D loss: 0.0249 | G loss: 0.0219 | M: 0.0348 | k: 0.0003
Epoch 3000/10000 | D loss: 0.0249 | G loss: 0.0063 | M: 0.0304 | k: 0.0033
Epoch 4000/10000 | D loss: 0.0242 | G loss: 0.0060 | M: 0.0316 | k: 0.0073
Epoch 5000/10000 | D loss: 0.0204 | G loss: 0.0240 | M: 0.0340 | k: 0.0080
Epoch 6000/10000 | D loss: 0.0195 | G loss: 0.0095 | M: 0.0200 | k: 0.0096
Epoch 7000/10000 | D loss: 0.0203 | G loss: 0.0065 | M: 0.0249 | k: 0.0099
Epoch 8000/10000 | D loss: 0.0175 | G loss: 0.0060 | M: 0.0207 | k: 0.0138
Epoch 9000/10000 | D loss: 0.0184 | G loss: 0.0046 | M: 0.0235 | k: 0.0186


In [10]:
def generate_images(generator, num_samples=10000):
    noise = np.random.normal(0, 1, (num_samples, 100))
    generated_images = generator.predict(noise)
    
    # Concatenate with real training data
    z_train_aug = np.concatenate([x_train, generated_images])
    y_train_aug = np.concatenate([y_train, np.random.randint(0, 10, num_samples)])
    
    return z_train_aug, y_train_aug

In [11]:
# Train CNN on original data
cnn = create_cnn()
history_orig = cnn.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
# Train CNN on augmented data
# Generate new images after training and augment the training data
z_train_aug, y_train_aug = generate_images(generator, num_samples=10000)
cnn_aug = create_cnn()
history_aug = cnn_aug.fit(z_train_aug, y_train_aug, epochs=5, validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(
        initial_sparsity=0.0, final_sparsity=0.5, 
        begin_step=0, end_step=np.ceil(len(x_train) / 32).astype(np.int32) * 5)
}

# Apply pruning to the model
pruned_model = tfmot.sparsity.keras.prune_low_magnitude(cnn, **pruning_params)

# Unfreeze some layers (for fine-tuning) if necessary
for layer in pruned_model.layers[-4:]:  # Example: Unfreeze the last 4 layers
    layer.trainable = True

# Recompile the model after unfreezing layers
pruned_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Create the pruning callback
pruning_callback = UpdatePruningStep()

# Fine-tune the model
history_pruned_aug = pruned_model.fit(z_train_aug, y_train_aug, epochs=5, validation_data=(x_test, y_test), callbacks=[pruning_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
 316/2188 [===>..........................] - ETA: 24s - loss: 0.3591 - accuracy: 0.8652

In [None]:
plt.figure(figsize=(12, 6))

# Plot validation accuracy for each model
plt.plot(history_orig.history['val_accuracy'], label='Original')
plt.plot(history_aug.history['val_accuracy'], label='Model with DCGAN Augmentation')
plt.plot(history_pruned_aug.history['val_accuracy'], label='Pruned with DCGAN Augmentation')

# Labeling the axes and adding a title
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy')
plt.title('Comparison of Validation Accuracy Across Models')

# Adding a legend to differentiate the lines
plt.legend()

# Display the plot
plt.show()