In [1]:
# import the libraries

import torch, pdb # pdb is for debugging
from torch.utils.data import DataLoader # build iterable training data
from torch import nn # build deep learning architectures and models
from torchvision import transforms # allow us to transform our data in different ways
from torchvision.datasets import MNIST # dataset composed of images of numbers
from torchvision.utils import make_grid # to build a grid of images to evaluate during the training
from tqdm.auto import tqdm # for the bar showing the progress of the process
import matplotlib.pyplot as plt # plot image with all the visuals that we want

In [2]:
# we want to show a grid with a number of generated images vs. real images
# so we need some way to basically visualise this grid

# visualisation function
def show_tensor_images(image_tensor, ch=1, num_images=25, size=(28, 28)):
  # to detach image tensor from all the gradient computations and pass it to the cpu (we will use gpu for training)
  # tensor: 128 x 784(28x28)
  # put everyting into a new data
  data = image_tensor.detach().cpu().view(-1,ch,*size) # 128 x 1 x 28 x 28
  # create the grid
  grid = make_grid(data[:num_images], nrow=5).permute(1, 2, 0) # 1 x 28 x 28 --> permute : 28 x 28 x 1
  plt.imshow(grid)
  plt.show()

In [3]:
# setup of the main parameters

epochs = 500 # number of cycles in training
cur_step = 0 # each of step we process one batch
info_step = 300 # within an epoch, each step processes a batch
mean_gen_loss = 0 # mean generator loss
mean_disc_loss = 0 # mean discriminator loss

# some hyperparameters

z_dim = 64 # the dimensionality of the noise vector that is the input of the generator
lr = 0.00001 # learning rate
loss_func = nn.BCEWithLogitsLoss() # loss function, takes the logits and apply them all at once to a sigmoid function that is gonna set the range [0,1]

bs = 128 # batch size, how many images to be processed in the GPU
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") # selecting current device to do the processing

# dataloader is going to hold our training data
dataloader = DataLoader(
    MNIST('.', download=True, transform=transforms.ToTensor()), # root folder, download it, transform data into multi-dimensional tensor
    batch_size = bs,
    shuffle = True) # every epoch reorder data

# number of steps = 60000 (MNIST size) / 128 = 468.75 almost 469

100%|██████████| 9.91M/9.91M [00:00<00:00, 11.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 338kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 2.72MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.17MB/s]


In [4]:
# declare our models

# generator
def genBlock(inp, out): # generator block with the size of input and the size of the output
  return nn.Sequential( # basically set a number of layers that are gonna be executed sequentially
      nn.Linear(inp, out), # a linear computation between input and output
      nn.BatchNorm1d(out), # 1D because we are using black & white and one dimemsioal images with a single channel, normalise the values that are coming from the previous layer
      nn.ReLU(inplace = True) # add non-linearity: in order to learn more complex functions
  )

class Generator(nn.Module):
  def __init__(self, z_dim = 64, i_dim = 784, h_dim = 128): # latent vector, output image size 28x28=784, base size of the hidden layer of the generator
    super().__init__()
    self.gen = nn.Sequential(
        genBlock(z_dim, h_dim), # 64 -> 128
        genBlock(h_dim, h_dim * 2), # 128 -> 256 we want to increase the size
        genBlock(h_dim * 2, h_dim * 4), # 256 -> 512 incresing the size
        genBlock(h_dim * 4, h_dim * 8), # 512 -> 1024
        nn.Linear(h_dim * 8, i_dim), # 1024 -> 784(28x28) we want to create an image with the same size of the MNIST images
        nn.Sigmoid() # set values [0,1]
    )

  def forward(self, noise): # the function that is gonna be executed when we basically run the instance of the class
    return self.gen(noise)

def gen_noise(number, z_dim): # the function that generates noise
  return torch.randn(number, z_dim).to(device) # returns a tensor field with random numbers from a normal dist. with mean of 0 and variance of 1 (standard normal distribution)

############################# ##########################

# discriminator
def discBlock(inp, out): # like generator
  return nn.Sequential(
    nn.Linear(inp, out),
    nn.LeakyReLU(0.2, inplace = True) # instead of giving 0 to negative values give them a small negative value on a slope
  )

class Discriminator(nn.Module):
  def __init__(self, i_dim = 784, h_dim = 256):
    super().__init__()
    self.disc = nn.Sequential(
        discBlock(i_dim, h_dim * 4), # 784 -> 1024
        discBlock(h_dim * 4, h_dim * 2), # 1024 - > 512
        discBlock(h_dim * 2, h_dim), # 512 -> 256
        nn.Linear(h_dim * 32, 1) # 256 -> 1
    )
  def forward(self, image): # takes the image
    return self.disc(image) # result of passing that image to the discriminator module
