In [1]:
# Install the watermark package.
# This package is used to record the versions of other packages used in this Jupyter notebook.
# https://github.com/rasbt/watermark
!pip install -q -U watermark

In [2]:
import os                         # Used for operating system related functionalities
import time  # Provides time-related functions for tracking training and testing time
import torch                      # The main PyTorch library
import torch.nn as nn             # Provides various neural network layers and functions
import torch.optim as optim       # Contains different optimization algorithms
import torchvision.datasets as datasets  # Provides access to popular datasets
import torchvision.transforms as transforms  # Offers various image transformations
from torch.utils.data import DataLoader    # Helps with efficient data loading
from torchvision.utils import save_image   # Used for saving generated images

In [3]:
# Load the watermark extension to display information about the Python version and installed packages.
%reload_ext watermark

# Display the versions of Python and installed packages.
%watermark -a 'Fabiano Falcão' -ws "https://fabianumfalco.github.io/" --python --iversions

Author: Fabiano Falcão

Website: https://fabianumfalco.github.io/

Python implementation: CPython
Python version       : 3.10.6
IPython version      : 8.11.0

torch      : 2.0.0
torchvision: 0.15.1



In [4]:
# Define the Discriminator
##############################################################################################################
# The code defines a discriminator module using a sequential model in PyTorch. 
# The discriminator takes an image as input and outputs a single value indicating 
# the probability of the input being real or fake. The model consists of three linear layers 
# with leaky ReLU activation functions and a final sigmoid activation function. 
# The forward method applies the model to the input and returns the output.
##############################################################################################################

class Discriminator(nn.Module):
    def __init__(self, image_size):
        super(Discriminator, self).__init__()
        # Define the model architecture
        self.model = nn.Sequential(
            nn.Linear(image_size, 512),     # Linear layer: input size is image_size, output size is 512
            nn.LeakyReLU(0.2),               # LeakyReLU activation function with a negative slope of 0.2
            nn.Linear(512, 256),             # Linear layer: input size is 512, output size is 256
            nn.LeakyReLU(0.2),               # LeakyReLU activation function with a negative slope of 0.2
            nn.Linear(256, 1),               # Linear layer: input size is 256, output size is 1
            nn.Sigmoid()                     # Sigmoid activation function to squash the output to the range [0, 1]
        )

    def forward(self, x):
        x = self.model(x)   # Pass the input through the model
        return x            # Return the output


In [5]:
# Define the Generator
##############################################################################################################
# The code defines a generator module using a sequential model in PyTorch. 
# The generator takes a latent vector as input and generates an image as output. 
# The model consists of three linear layers with leaky ReLU activation functions and 
# a final Tanh activation function. The forward method applies the model to the input and 
# returns the output. The Tanh activation function is used to ensure that 
# the generated image has pixel values in the range [-1, 1].
##############################################################################################################

class Generator(nn.Module):
    def __init__(self, latent_size, image_size):
        super(Generator, self).__init__()
        # Define the model architecture
        self.model = nn.Sequential(
            nn.Linear(latent_size, 256),      # Linear layer: input size is latent_size, output size is 256
            nn.LeakyReLU(0.2),                 # LeakyReLU activation function with a negative slope of 0.2
            nn.Linear(256, 512),               # Linear layer: input size is 256, output size is 512
            nn.LeakyReLU(0.2),                 # LeakyReLU activation function with a negative slope of 0.2
            nn.Linear(512, image_size),        # Linear layer: input size is 512, output size is image_size
            nn.Tanh()                          # Tanh activation function to squash the output to the range [-1, 1]
        )

    def forward(self, x):
        x = self.model(x)   # Pass the input through the model
        return x            # Return the output

In [6]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Hyperparameters
# latent_size: The size of the latent vector used as input for the generator. 
#              It determines the dimensionality of the random noise used to generate images.
# image_size: The size of the input image. In this case, it's set to 28 * 28, corresponding to 
#             the size of the images in the MNIST dataset.
# batch_size: The number of samples processed in each mini-batch during training. 
#             It affects the speed and memory requirements of the training process.
# epochs: The number of times the entire dataset will be iterated over during training. 
#         One epoch represents a complete pass through the dataset.

latent_size = 100    # Size of the latent vector for the generator input
image_size = 28 * 28  # Size of the input image (28x28 for MNIST)
batch_size = 128     # Number of samples in each mini-batch
epochs = 100         # Number of training epochs

Using device: cuda


In [7]:
# Load the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),                                     # Convert images to tensors
    transforms.Normalize((0.5,), (0.5,))                       # Normalize the pixel values to the range [-1, 1]
])

train_dataset = datasets.MNIST(root='./data/mnist_data/',             # Root directory to store the dataset
                              train=True,                      # Load the training set
                              transform=transform,             # Apply the specified transformations
                              download=True)                   # Download the dataset if it's not already downloaded

train_loader = DataLoader(train_dataset,                        # Wrap the training dataset with a data loader
                          batch_size=batch_size,                # Number of samples in each mini-batch
                          shuffle=True)                         # Shuffle the data at the beginning of each epoch


In [8]:
# Initialize the discriminator and generator

# Create an instance of the Discriminator model and move it to the device (GPU or CPU)
discriminator = Discriminator(image_size).to(device)

# Create an instance of the Generator model and move it to the device (GPU or CPU)
generator = Generator(latent_size, image_size).to(device)

In [9]:
# Loss function and optimizers
criterion = nn.BCELoss()   # Binary Cross Entropy Loss function for training the discriminator and generator

# Adam optimizer for updating the discriminator's parameters
discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)   
# Adam optimizer for updating the generator's parameters
generator_optimizer = optim.Adam(generator.parameters(), lr=0.0002)      


In [None]:
# Check if the directory './data/gan/' exists, if not, create it
if not os.path.exists('./data/gan/'):
    os.makedirs('./data/gan/')

start_time = time.time()  # Record the start time of training

# Training loop
for epoch in range(epochs):
    for batch_idx, (real_images, _) in enumerate(train_loader):
        real_images = real_images.view(-1, image_size).to(device)
        batch_size = real_images.shape[0]

        # Train Discriminator
        discriminator.zero_grad()
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        # Real images
        real_outputs = discriminator(real_images)
        discriminator_real_loss = criterion(real_outputs, real_labels)
        discriminator_real_loss.backward()

        # Fake images
        noise = torch.randn(batch_size, latent_size).to(device)
        fake_images = generator(noise)
        fake_outputs = discriminator(fake_images.detach())
        discriminator_fake_loss = criterion(fake_outputs, fake_labels)
        discriminator_fake_loss.backward()

        discriminator_loss = discriminator_real_loss + discriminator_fake_loss
        discriminator_optimizer.step()

        # Train Generator
        generator.zero_grad()
        fake_outputs = discriminator(fake_images)
        generator_loss = criterion(fake_outputs, real_labels)
        generator_loss.backward()
        generator_optimizer.step()

        # Print losses
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch}/{epochs}], Batch [{batch_idx}/{len(train_loader)}], "
                  f"Discriminator Loss: {discriminator_loss.item():.4f}, "
                  f"Generator Loss: {generator_loss.item():.4f}")

    # Save generated images
    with torch.no_grad():
        noise = torch.randn(16, latent_size).to(device)
        generated_images = generator(noise).reshape(-1, 1, 28, 28)
        save_image(generated_images, f"./data/gan/gan_generated_images_epoch{epoch + 1}.png", nrow=4, normalize=True)

end_time = time.time()  # Record the end time of training
training_time = end_time - start_time  # Calculate the total training time

# Convert the training duration to the format hh:mm:ss
hours = int(training_time // 3600)
minutes = int((training_time % 3600) // 60)
seconds = int(training_time % 60)

print('Finished Training')
print(f"\nTraining Time: {hours:02d}:{minutes:02d}:{seconds:02d}")     

Epoch [0/100], Batch [0/469], Discriminator Loss: 1.4366, Generator Loss: 0.6688
Epoch [0/100], Batch [100/469], Discriminator Loss: 0.5819, Generator Loss: 1.6277
Epoch [0/100], Batch [200/469], Discriminator Loss: 0.6051, Generator Loss: 1.9980
Epoch [0/100], Batch [300/469], Discriminator Loss: 0.4360, Generator Loss: 4.1471
Epoch [0/100], Batch [400/469], Discriminator Loss: 0.0756, Generator Loss: 8.4709
Epoch [1/100], Batch [0/469], Discriminator Loss: 0.1129, Generator Loss: 6.5763
Epoch [1/100], Batch [100/469], Discriminator Loss: 0.2815, Generator Loss: 3.1278
Epoch [1/100], Batch [200/469], Discriminator Loss: 0.3459, Generator Loss: 5.1569
Epoch [1/100], Batch [300/469], Discriminator Loss: 0.6497, Generator Loss: 2.8659
Epoch [1/100], Batch [400/469], Discriminator Loss: 0.0582, Generator Loss: 5.0110
Epoch [2/100], Batch [0/469], Discriminator Loss: 1.0731, Generator Loss: 2.3919
Epoch [2/100], Batch [100/469], Discriminator Loss: 0.3548, Generator Loss: 5.0652
Epoch [2/1

Epoch [19/100], Batch [400/469], Discriminator Loss: 0.5578, Generator Loss: 3.9104
Epoch [20/100], Batch [0/469], Discriminator Loss: 0.3196, Generator Loss: 2.8157
Epoch [20/100], Batch [100/469], Discriminator Loss: 0.3733, Generator Loss: 2.8100
Epoch [20/100], Batch [200/469], Discriminator Loss: 0.6121, Generator Loss: 3.4792
Epoch [20/100], Batch [300/469], Discriminator Loss: 0.7721, Generator Loss: 3.5488
Epoch [20/100], Batch [400/469], Discriminator Loss: 0.4941, Generator Loss: 2.8724
Epoch [21/100], Batch [0/469], Discriminator Loss: 0.5055, Generator Loss: 2.5486
Epoch [21/100], Batch [100/469], Discriminator Loss: 0.6846, Generator Loss: 2.3681
Epoch [21/100], Batch [200/469], Discriminator Loss: 0.5068, Generator Loss: 2.9213
Epoch [21/100], Batch [300/469], Discriminator Loss: 0.7492, Generator Loss: 2.5819
Epoch [21/100], Batch [400/469], Discriminator Loss: 0.4387, Generator Loss: 3.3490
Epoch [22/100], Batch [0/469], Discriminator Loss: 0.6968, Generator Loss: 1.476