In [2]:
# Import the required libraries
# For this example we will use pytorch to manage the construction of the neural networks and the training
# torchvision is a module that is part of pytorch that supports vision datasets and it will be where we will source the mnist - handwritten digits - data

from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils



In [6]:
# Setting a seed will determine which data elements are selected. To replicate results keep the same seed.
manualSeed = random.randint(1, 10000)
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)


Random Seed:  9225


<torch._C.Generator at 0x22f5552eb90>

In [8]:
# This is a check if there is a gpu available for training. At the moment we are assuming that it is not available.
torch.cuda.is_available()


False

In [10]:
# Assuming the GPU is not available means we will set the device to cpu and set up some parameters
cudnn.benchmark = True
device = torch.device("cpu")
ngpu = 0
#This is the width of the latent space matrix
nz = 100
# This is the generator matrix shape
ngf = 64
# This is the descrimator matrix shape
ndf = 64
# This is the number of color channels - other datasets may have 3 if they are color
nc = 1
# The number of sample to process per pass
batch_size = 64
# the number of CPU workers to work on the dataset
workers = 4

In [12]:
dataset = dset.MNIST(root='data', download=True,   # Loads the MNIST dataset, which consists of handwritten digits, root='data': Specifies the directory where the dataset will be stored. download=True: Downloads the dataset if it's not already present in the specified directory.
                      transform=transforms.Compose([
                          transforms.Resize(64),  #Resizes the images to 64x64 pixels.
                          transforms.ToTensor(),  #Converts the images to PyTorch tensors.
                          transforms.Normalize((0.5,), (0.5,)),  #Normalizes the images with a mean of 0.5 and a standard deviation of 0.5.
                      ]))

dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, #Creates a DataLoader to efficiently load data in batches,  The dataset to load (in this case, the MNIST dataset), The number of images to load in each batch.
                                         shuffle=True, num_workers=int(workers)) #Randomly shuffles the data at every epoch, 4 subprocesses will be used to load the data in parallel.


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data\MNIST\raw\train-images-idx3-ubyte.gz


100%|████████████████████████████████████████████████████████████████████| 9912422/9912422 [00:11<00:00, 835198.79it/s]


Extracting data\MNIST\raw\train-images-idx3-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████████████████████| 28881/28881 [00:00<00:00, 261594.88it/s]


Extracting data\MNIST\raw\train-labels-idx1-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|████████████████████████████████████████████████████████████████████| 1648877/1648877 [00:03<00:00, 512912.37it/s]


Extracting data\MNIST\raw\t10k-images-idx3-ubyte.gz to data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████████████| 4542/4542 [00:00<?, ?it/s]

Extracting data\MNIST\raw\t10k-labels-idx1-ubyte.gz to data\MNIST\raw






In [14]:


# custom weights initialization called on netG and netD
# The weights will need to be initialised based on the layer type to some value before training. These could be imported from past training steps.
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1: # Checks if the class name contains 'Conv' (indicating it's a convolutional layer)..
        torch.nn.init.normal_(m.weight, 0.0, 0.02) #Initializes the weights of the convolutional layer with a normal distribution (mean=0.0, std=0.02)
    elif classname.find('BatchNorm') != -1:  #checks if the class name contains 'BatchNorm' (indicating it's a batch normalization layer).
        torch.nn.init.normal_(m.weight, 1.0, 0.02) #Initializes the weights of the batch normalization layer with a normal distribution (mean=1.0, std=0.02).
        torch.nn.init.zeros_(m.bias)  #Initializes the biases of the batch normalization layer to zeros.


In [16]:
# This is the bulk of the neural network definition for the Generator.
# The init sets up the layers and connecting activation functions.
# The forward function processes the data through the layers
class Generator(nn.Module):
    def __init__(self, ngpu): #initializes the class, setting up the number of GPUs (ngpu) and calling the parent class's initializer.
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential( # is used to build a sequential container of layers.
            # input is Z, going into a convolution
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), #layers perform the transpose convolution operations, which are essential for upsampling.
            nn.BatchNorm2d(ngf * 8), #normalizes the outputs of the convolutional layers to stabilize and speed up the training.
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d(ngf,      nc, 4, 2, 1, bias=False),
            nn.Tanh() #scales the output to the range [-1, 1].
            # state size. (nc) x 64 x 64
        )

    def forward(self, input): # defines how the data passes through the network.
        if input.is_cuda and self.ngpu > 1:
            output = nn.parallel.data_parallel(  #parallelize the data across GPUs, If the input is on a CUDA device and multiple GPUs are available,
                self.main, input, range(self.ngpu))
        else:
            output = self.main(input)  #the input data is processed through the network layers sequentially.

        return output


netG = Generator(ngpu).to(device)  #reates an instance of the Generator class and moves it to the specified device
netG.apply(weights_init)  # initializes the weights of the network.
print(netG)


Generator(
  (main): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)


In [18]:
# This is the bulk of the neural network definition for the Discrimator.
# The init sets up the layers and connecting activation functions.
# The forward function processes the data through the layers
class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        if input.is_cuda and self.ngpu > 1:
            output = nn.parallel.data_parallel(
                self.main, input, range(self.ngpu))
        else:
            output = self.main(input)

        return output.view(-1, 1).squeeze(1)
    
netD = Discriminator(ngpu).to(device)
netD.apply(weights_init)
print(netD)

Discriminator(
  (main): Sequential(
    (0): Conv2d(1, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (12): Sigmoid()
  )
)


In [22]:
# Set the loss function from pytorches established modules
criterion = nn.BCELoss() #Binary Cross Entropy Loss function. It measures the error between the predicted output and the actual label (real or fake). 
#This loss function is commonly used in binary classification problems.

# Set up the initial noise of the latent space to sample from.
# Set the label of a real and fake sample to 0,1
fixed_noise = torch.randn(64, nz, 1, 1, device=device)
#fixed_noise is a tensor of random values sampled from a normal distribution. It serves as the input to the generator to produce fake data. 
#The shape (64, nz, 1, 1) indicates 64 samples, each with nz dimensions, and the values are placed on the specified device (CPU or GPU).
real_label = 1
fake_label = 0

# Create the optimiser which will dynamically change the parameters of the learning function over time to imporve the training process
optimizerD = optim.Adam(netD.parameters(), lr=0.0005, betas=(0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=0.0005, betas=(0.5, 0.999))


In [None]:
# This is the engine of the code base - explicitly taking the objects created above 
# (The generator, discrimator and the dataset) and connecting them together to learn.

for epoch in range(1):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        # train with real
        
        # Set the descrimator to forget any gradients:This line resets the gradients of the discriminator (netD). 
        #This is necessary to ensure that the gradients from the previous batch do not accumulate.
        netD.zero_grad()
        # Get a sample of real handwritten digits and label them as 1 - all real
        real_cpu = data[0].to(device)  #etrieves a batch of real handwritten digits from the dataloader and moves it to the specified device (CPU or GPU).
        batch_size = real_cpu.size(0)  #gets the number of samples in the current batch.
        label = torch.full((batch_size,), real_label, dtype=real_cpu.dtype, device=device)  # creates a tensor of labels, all set to real_label (1), indicating that these samples are real.
        # passes the real data through the discriminator to get its output.
        output = netD(real_cpu)
        # calculates the error (loss) between the discriminator's output and the real labels using the Binary Cross Entropy Loss function (criterion)
        errD_real = criterion(output, label)
        #computes the gradients of the error of each layer of the network with respect to the discriminator's parameters.
        errD_real.backward()
        # Get the average of the output across the batch of the discriminator for the real data, which can be used for monitoring the training process.
        D_x = output.mean().item()

        # train with fake: Generates a batch of random noise vectors (noise) with the same batch size as the real data. 
        noise = torch.randn(batch_size, nz, 1, 1, device=device)
        # pass the noise through the generator layers (netG) to produce fake images (fake).

        fake = netG(noise)
        # Sets the labels for the fake data to fake_label (0), indicating that these samples are fake.
        label.fill_(fake_label)
        # ask the discrimator to judge the fake images
        output = netD(fake.detach())
        # measure the error
        errD_fake = criterion(output, label)
        # Calculate the gradients 
        errD_fake.backward()
        # Get the average output across the batch again
        D_G_z1 = output.mean().item()
        # Get the error
        errD = errD_real + errD_fake
        # Run the optimizer to update the weights
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        # Set the gradients of the generator to zero
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # get the judgements from the discrimator of the generator output is fake
        output = netD(fake)
        # calculate the error
        errG = criterion(output, label)
        # update the gradients
        errG.backward()
        # Get the average of the output across the batch
        D_G_z2 = output.mean().item()
        # update the weights
        optimizerG.step()

        print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f'
              % (epoch, 1, i, len(dataloader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
        # every 100 steps save a real sample and a fake sample for comparison
        if i % 100 == 0:
            vutils.save_image(real_cpu,'real_samples.png',normalize=True)
            fake = netG(fixed_noise)
            vutils.save_image(fake.detach(),'fake_samples_epoch_%03d.png' % epoch, normalize=True)
