In [6]:
# Resource tutorial: https://www.udemy.com/course/generative-creative-ai-from-gans-to-clip-with-python-and-pytorch/learn/lecture/27451002?start=360#overview

In [5]:
import pdb # python debugger
import torch
from torch.utils.data import DataLoader # allow batch feeds of data
from torch import nn # build deep learning models
from torchvision import transforms # allow us to transform training data
from torchvision.datasets import MNIST
from torchvision.utils import make_grid # make grid of images for checks

In [9]:
from tqdm.auto import tqdm # make a progress bar during training

In [10]:
import matplotlib.pyplot as plt

In [11]:
# visualization function

In [24]:
def show(tensor, ch=1, size=(28,28), num=16): 
    '''
    @param ch : colour channel. we use gray scale so channel = 1
    @param size : size of mnist data set is 28x28 pixels
    @num : number of images to show in the grid 
    '''
    data=tensor.detach().cpu().view(-1,ch,*size) # detach variable from computation of gradients
    grid= make_grid(data[:num], nrow=4).permute(1,2,0)
    plt.imshow(grid)
    plt.show()

In [13]:
# set up the main parameters and hyperparameters
epochs = 500
cur_step = 0
info_step = 300
mean_gen_loss = 0
mean_disc_loss = 0

z_dim = 64 
lr = 0.00001 # learning rate. speed for pushing to negative gradient
loss_func = nn.BCEWithLogitsLoss() 

bs = 128
device = 'cuda'

# DataLoader = batch the data
# MNIST = download mnist data to current folder, transform to tensor and shuffle
dataloader = DataLoader(MNIST('.', download=True, transform=transforms.ToTensor()), shuffle=True, batch_size=bs)

# number of steps = 60000 / 128 = 468.75 
# in every epoch, it will have 469 steps

In [14]:
# Declare our models

In [15]:
# Generator 

# Step 1. Create generator Block
def genBlock(inp, out):
    '''
    @param inp : input size
    @param out : output size
    '''
    return nn.Sequential(
        # linear computations for Wx + B
        nn.Linear(inp, out), 
        # 1 dimensional black and white images with single channel 
        # normalise the values from previous layers to help stablise training
        nn.BatchNorm1d(out), 
        # apply non-linearity ReLU function. 
        # returns whatever arrives as negative values as 0, and lets positive values pass
        # helps neural network learn more complex functions`
        nn.ReLU(inplace=True)
    )
    

In [16]:
# Step 2. Create generator class
'''
Generator will recieve as input a NOISE vector of dimensionality 64 
Will produce an output of image 28 * 28 = 784 ... 
'''

class Generator(nn.Module):
    def __init__(self, z_dim=64, i_dim=784, h_dim=128):
        '''
        @param z_dim : noise vector of dimensionality 64
        '''
        super().__init__()
        self.gen = nn.Sequential(
            # small layers to big layers
            genBlock(z_dim, h_dim), # 64 -> 128
            genBlock(h_dim, h_dim*2), # 128 -> 256
            genBlock(h_dim*2, h_dim*4), # 256 -> 512
            genBlock(h_dim*4, h_dim*8), # 512 -> 1024
            nn.Linear(h_dim*8, i_dim), #1024, 784 (28*28)
            nn.Sigmoid()
        )
    
    def forward(self, noise):
        return self.gen(noise)
    
'''
randomly sample noise vectors to correspond with size of batch    
'''
def gen_noise(number, z_dim):
    return torch.randn(number, z_dim).to(device) # store input of generator stores into gpu   

In [17]:
# Discriminator

In [18]:
'''
We want to know if image is real or fake
input is image 28 * 28 
output of descriminator is 1 value. 
output: 1 if real, 0 if fake
'''
def discBlock(inp, out):
    return nn.Sequential(
        # linear layer input size to output size
        nn.Linear(inp, out),
        # non linearity to learn complex mappings
        # LeakyReLU -> non linear function that helps with dying relu.
        #.             some neurons may die and generate just gradients of 0.. and not learn
        #              leaky rely gives small negative values on a small slope.
        #              stops neurons dying and solves dying relu problem 
        nn.LeakyReLU(0.2) 
    )

In [19]:
class Discriminator(nn.Module):
    def __init__(self, i_dim=784, h_dim=256):
        super().__init__()
        self.disc=nn.Sequential(
            # big layers to smaller layers
            discBlock(i_dim, h_dim*4), # 784 -> 1034 ... dimension of image to dimension of hidden layer 
            discBlock(h_dim*4, h_dim*2), # 1024 -> 512 
            discBlock(h_dim*2, h_dim), # 512 -> 256
            nn.Linear(h_dim, 1) # 256 -> 1
        )
        
    def forward(self, image):
        return self.disc(image)

In [20]:
gen = Generator(z_dim).to(device)
# optimizer 
# ... calculate the gradients with back propagation + chain rule 
# ... tweak parameters of generator and discriminator by applying learning rate
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr)

AssertionError: Torch not compiled with CUDA enabled

In [30]:
gen = Generator(z_dim)
# optimizer 
# ... calculate the gradients with back propagation + chain rule 
# ... tweak parameters of generator and discriminator by applying learning rate
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr)

In [31]:
disc = Discriminator()
disc_opt = torch.optim.Adam(disc.parameters(), lr=lr)

In [23]:
gen

Generator(
  (gen): Sequential(
    (0): Sequential(
      (0): Linear(in_features=64, out_features=128, bias=True)
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): Sequential(
      (0): Linear(in_features=128, out_features=256, bias=True)
      (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (2): Sequential(
      (0): Linear(in_features=256, out_features=512, bias=True)
      (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (3): Sequential(
      (0): Linear(in_features=512, out_features=1024, bias=True)
      (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (4): Linear(in_features=1024, out_features=784, bias=True)
    (5): Sigmoid()
  )
)

In [26]:
# next -> give next batch
# iter -> cast data loader data as iterator data structure
x,y = next(iter(dataloader))

In [28]:
print(x.shape, y.shape)
print(y[:10])

torch.Size([128, 1, 28, 28]) torch.Size([128])
tensor([7, 2, 6, 6, 4, 3, 2, 5, 4, 0])


In [32]:
noise = gen_noise(bs, z_dim) # create noise vector
fake = gen(noise) # pass noise through generator model
show(fake)

AssertionError: Torch not compiled with CUDA enabled

In [1]:
# calculating the loss 

# generator loss
def calc_gen_loss(loss_func, gen, disc, number, z_dim):
    '''
    @loss func : loss function
    @param gen : generator model
    @param disc : disciminator model
    @number : number of elements to process
    @z_dim : dimension of input latent vector
    '''
    # produce a noise vector of size of the batch vector
    noise = gen_noise(number, z_dim) 
    # use generator class instance model to take noise and generate the fake data
    fake = gen(noise)
    # take fake output of generator and pass it through discriminator and store output as pred
    pred = disc(fake)
    
    # create a tensor with dimensionality similar to preds 
    # with all values set to 1
    targets = torch.ones_like(pred)
    
    # apply loss function which compares predictions with the targets 
    # targets is a vector of 1, which means all are real
    gen_loss = loss_func(pred, targets)
    
    return gen_loss

In [2]:
def calc_disc_loss(loss_func, gen, disc, number, real, z_dim):
    '''
    discriminator loss function. Similar to above function
    '''
    # noise vectors
    noise = gen_noise(number, z_dim) 
    #  generator model makes fake images
    fake = gen(noise) 
    # discrimintor model with DETACH(). 
    # when we backpropagate, the loss of the discriminator to calculate gradients
    # we do NOT want to change parameters of generator, when we are optimising discriminator
    disc_fake = disc(fake.detach()) 
    
    # create a tensor of ZEROES with same dimension as FAKE images
    disc_fake_targets = torch.zeros_like(disc_fake)
    # compare fake images to targets
    disc_fake_loss = loss_func(disc_fake, disc_fake_targets)
    
    disc_real = disc(real)
    # create a tensor of ONES with same dimension as REAL images
    disc_real_targets=torch.ones_like(disc_real)
    disc_real_loss=loss_func(disc_real, disc_real_targets)
    
    # Final loss is the average of the discriminator loss of both fake and real images
    disc_loss =(disc_fake_loss+disc_real_loss)/2
    
    return disc_loss

In [3]:
# Training Loop

### 60000 / 128 = 468.75 = 469 steps in each epoch
### each step is going to process 128 images = size of batch - except last step

In [4]:
for epoch in range(epochs):
    # we get images from mnist
    # store images in variable real
    # labels returned but no need to store as variable as this is not supervised learning
    for real, _ in dataloader: 
        ### discriminator
        disc_opt.zero_grad()
        
        # size of real is batch of 128 images x one channel 
        # 128 * 1 * 28 * 28
        cur_bs = len(real)
        # reshape real images as size of batch
        real = real.view(cur_bs, -1)  # 128 * 784
        real = real.to(device)
        
        disc_loss = calc_disc_loss(loss_func, gen, disc, cur_bs, real, z_dim)
        
        # backward function takes loss value and backpropagrate to calculate gradients 
        # across neural network
        # @param: retain_graph 
        disc_loss.backward(retain_graph=True)
        disc_opt.step()
        
        ### generator
        gen_opt.zero_grad()
        gen_loss = calc_gen_loss(loss_func, gen, disc, cur_bs, z_dim)
        gen_loss.backward(retain_graph=True)
        gen_opt.step()
        
        ### visualization & stats
        mean_disc_loss += disc_loss.item()/info_step # item transform tensor value into a single number
        mean_gen_loss += gen_loss.item()/info_step
        
        ### show information
        if cur_step % info_step == 0 and cur_step > 0:
            # compare fake generated images with real images
            fake_noise = gen_noise(cur_bs, z_dim)
            fake = gen(fake_noise)
            show(fake)
            show(real)
            print(f"{epoch}: step {cur_step} / Gen loss: {mean_gen_loss} / disc loss: {mean_disc_loss}")
            mean_gen_loss, mean_disc_loss = 0,0
        cur_step+=1

NameError: name 'epochs' is not defined