In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, LeakyReLU, Reshape, Conv2DTranspose, GlobalMaxPooling2D
from tensorflow.keras import Input, Sequential, Model
from tensorflow.keras.metrics import Mean
import numpy as np
import os

2023-12-25 15:24:49.844521: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-25 15:24:49.844620: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-25 15:24:49.847206: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-25 15:24:49.860634: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Create the discriminator
discriminator = Sequential(
    [
        Input(shape=(28, 28, 1)),
        Conv2D(64, (3, 3), strides=(2, 2), padding="same"),
        LeakyReLU(alpha=0.2),
        Conv2D(128, (3, 3), strides=(2, 2), padding="same"),
        LeakyReLU(alpha=0.2),
        GlobalMaxPooling2D(),
        Dense(1),
    ],
    name="discriminator",
)

# Create the generator
latent_dim = 128
generator = Sequential(
    [
        Input(shape=(latent_dim,)),
        # We want to generate 128 coefficients to reshape into a 7x7x128 map
        Dense(7 * 7 * 128),
        LeakyReLU(alpha=0.2),
        Reshape((7, 7, 128)),
        Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"),
        LeakyReLU(alpha=0.2),
        Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"),
        LeakyReLU(alpha=0.2),
        Conv2D(1, (7, 7), padding="same", activation="sigmoid"),
    ],
    name="generator",
)
discriminator.summary()
generator.summary()

2023-12-25 15:24:52.950381: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2d:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-25 15:24:53.015579: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2d:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-25 15:24:53.015641: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2d:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-25 15:24:53.020364: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2d:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-25 15:24:53.020629: I external/local_xla/xla/stream_executor

Model: "discriminator"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 14, 14, 64)        640       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 14, 14, 64)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 128)         73856     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 7, 7, 128)         0         
                                                                 
 global_max_pooling2d (Glob  (None, 128)               0         
 alMaxPooling2D)                                                 
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                     

In [3]:
class GAN(Model):
    def __init__(self, discriminator, generator, latent_dim):
        super().__init__()
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim
        self.d_loss_tracker = Mean(name="d_loss")
        self.g_loss_tracker = Mean(name="g_loss")

    def compile(self, d_optimizer, g_optimizer, loss_fn):
        super().compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.loss_fn = loss_fn

    def train_step(self, real_images):
        if isinstance(real_images, tuple):
            real_images = real_images[0]
        # Sample random points in the latent space
        batch_size = tf.shape(real_images)[0]
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))

        # Decode them to fake images
        generated_images = self.generator(random_latent_vectors)

        # Combine them with real images
        combined_images = tf.concat([generated_images, real_images], axis=0)

        # Assemble labels discriminating real from fake images
        labels = tf.concat(
            [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0
        )
        # Add random noise to the labels - important trick!
        labels += 0.05 * tf.random.uniform(tf.shape(labels))

        # Train the discriminator
        with tf.GradientTape() as tape:
            predictions = self.discriminator(combined_images)
            d_loss = self.loss_fn(labels, predictions)
        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_optimizer.apply_gradients(
            zip(grads, self.discriminator.trainable_weights)
        )

        # Sample random points in the latent space
        random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))

        # Assemble labels that say "all real images"
        misleading_labels = tf.zeros((batch_size, 1))

        # Train the generator (note that we should *not* update the weights
        # of the discriminator)!
        with tf.GradientTape() as tape:
            predictions = self.discriminator(self.generator(random_latent_vectors))
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))

        # Update metrics and return their value.
        self.d_loss_tracker.update_state(d_loss)
        self.g_loss_tracker.update_state(g_loss)
        return {
            "d_loss": self.d_loss_tracker.result(),
            "g_loss": self.g_loss_tracker.result(),
            "images": generated_images
        }

In [4]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
import datetime
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
os.makedirs(log_dir, exist_ok=True)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
file_writer = tf.summary.create_file_writer(log_dir)

In [5]:
# Prepare the dataset. We use both the training & test MNIST digits.
batch_size = 64
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
all_digits = np.concatenate([x_train, x_test])
all_digits = all_digits.astype("float32") / 255.0
all_digits = np.reshape(all_digits, (-1, 28, 28, 1))
dataset = tf.data.Dataset.from_tensor_slices(all_digits)
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)

gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
    d_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
    g_optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
    loss_fn=tf.keras.losses.BinaryCrossentropy(from_logits=True),
)

# gan.fit(dataset, epochs=20, callbacks=[tensorboard_callback])

epochs = 20  # In practice you need at least 20 epochs to generate nice digits.

for epoch in range(epochs):
    print("\nStart epoch", epoch)

    for step, real_images in enumerate(dataset):
        # Train the discriminator & generator on one batch of real images.
        out = gan.train_step(real_images)
        
        d_loss = out["d_loss"]
        g_loss = out["g_loss"]
        generated_images = out["images"]

        # Logging.
        if step % 200 == 0:
            # Print metrics
            print("discriminator loss at step %d: %.2f" % (step, d_loss))
            print("adversarial loss at step %d: %.2f" % (step, g_loss))

            with file_writer.as_default():
                tf.summary.image("Generated images", generated_images, max_outputs=25, step=step+len(dataset)*epoch)
            
            # Save one generated image
            img = tf.keras.utils.array_to_img(generated_images[0] * 255.0, scale=False)
            img.save(os.path.join(log_dir, "generated_img" + str(step) + ".png"))



Start epoch 0


2023-12-25 15:25:13.344084: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2023-12-25 15:25:13.469758: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-25 15:25:14.642324: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-25 15:25:15.814444: I external/local_xla/xla/service/service.cc:168] XLA service 0x10f4fee0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-25 15:25:15.814528: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1050 Ti, Compute Capability 6.1
2023-12-25 15:25:15.824302: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1703535915.933080  319103 device_compiler.h

discriminator loss at step 0: 0.70
adversarial loss at step 0: 0.70
discriminator loss at step 200: 0.33
adversarial loss at step 200: 1.36
discriminator loss at step 400: 0.30
adversarial loss at step 400: 1.96
discriminator loss at step 600: 0.38
adversarial loss at step 600: 1.77
discriminator loss at step 800: 0.41
adversarial loss at step 800: 1.69
discriminator loss at step 1000: 0.41
adversarial loss at step 1000: 1.71

Start epoch 1
discriminator loss at step 0: 0.40
adversarial loss at step 0: 1.74
discriminator loss at step 200: 0.39
adversarial loss at step 200: 1.77
discriminator loss at step 400: 0.38
adversarial loss at step 400: 1.81
discriminator loss at step 600: 0.36
adversarial loss at step 600: 1.87
discriminator loss at step 800: 0.35
adversarial loss at step 800: 1.95
discriminator loss at step 1000: 0.31
adversarial loss at step 1000: 2.67

Start epoch 2
discriminator loss at step 0: 0.29
adversarial loss at step 0: 3.26
discriminator loss at step 200: 0.28
adver

2023-12-25 15:33:58.450606: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 266.25MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-12-25 15:33:58.542028: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 266.25MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-12-25 15:33:58.639439: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 266.25MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-12-25 15:33:58.733650: W external/local_tsl/tsl/framewor

discriminator loss at step 400: 0.45
adversarial loss at step 400: 2.25
discriminator loss at step 600: 0.45
adversarial loss at step 600: 2.21
discriminator loss at step 800: 0.46
adversarial loss at step 800: 2.17
discriminator loss at step 1000: 0.46
adversarial loss at step 1000: 2.14


2023-12-25 15:35:20.519701: W external/local_tsl/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 24.50MiB (rounded to 25690112)requested by op Conv2DBackpropInput
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-12-25 15:35:20.519990: I external/local_tsl/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2023-12-25 15:35:20.520006: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 56, Chunks in use: 56. 14.0KiB allocated for chunks. 14.0KiB in use in bin. 2.0KiB client-requested in use in bin.
2023-12-25 15:35:20.520011: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	Total Chunks: 18, Chunks in use: 17. 9.5KiB allocated for chunks. 8.8KiB in use in bin. 8.5KiB client-requested in use in bin.
2023-12-25 15:

ResourceExhaustedError: {{function_node __wrapped__Conv2DBackpropInput_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[64,128,28,28] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Conv2DBackpropInput] name: 