# DCGAN + conditional DCGAN


In this notebook will learn about Generative Adversarial Networks by implementing a DCGAN (Deep Convolutional GAN) to generate images from noise, followed by a conditional DCGAN.

**Important:** Set the Colab environment to run on GPU


In [None]:
import torch
from torch import nn, optim
from torchvision import transforms, datasets, utils
from PIL import Image
import numpy as np
import math
from IPython.display import display
from tqdm import tqdm
device = torch.device("cuda")

## Hyperparameters

In [None]:
num_epochs = 20

lr = 0.0002
betas = (0.5, 0.999)

noise_size = 100
batch_size = 128
num_val_samples = 25
num_classes = 10
num_input_channels = 1

## Dataset
Download and prepare dataset


In [None]:
train_transforms = transforms.Compose(
            [
                transforms.Resize(32),
                transforms.ToTensor(),
                transforms.Normalize((0.5,), (.5,))
            ])
dataset = datasets.MNIST(root='data', train=True, transform=train_transforms, download=True)

## Data Loader
Create a data loader for the MNIST dataset

In [None]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

# DCGAN

## Networks
First, lets define our simple generator

### Exercise 1: Generator

The generator takes random noise as input and gives an image as output. Your exercise is to create the generator model.

It should follow these guidelines:
* The input will be a vector with random noise of size `noise_size`
* You should first apply a fully connected with output size 512\*4\*4 (channels\*height\*width)
* Then you should apply 3 blocks of:
    * TransposedConvolution with kernel size 4, stride 2 and padding 1, and bias=False. For the first 2 blocks, the output channels should be 256 and 128. For the third block, the output channels should be the correct one to generate images of the dataset.
    * BatchNorm2d except for the last block.
    * ReLU activation for the first 2 blocks and Tanh for the third block.

**Hint**: Remember to use reshape where necessary

In [None]:
class Generator(torch.nn.Module):

    def __init__(self):
        super().__init__()

        # TODO: Create the Fully connected layer using nn.Linear
        self.fc = nn.Linear(noise_size,512*4*4)
        # TODO: Create the First block using nn.Sequential with ConvTranspose2d, BatchNorm2d and activation
        self.convt1 = nn.Sequential(nn.ConvTranspose2d(512,256,kernel_size=4,stride=2,padding=1,bias=False),
                                    nn.BatchNorm2d(256),
                                    nn.ReLU())
        # TODO: Create the Second block using nn.Sequential with ConvTranspose2d, BatchNorm2d and activation
        self.convt2 = nn.Sequential(nn.ConvTranspose2d(256,128,kernel_size=4,stride=2,padding=1,bias=False),
                                    nn.BatchNorm2d(128),
                                    nn.ReLU())
        # TODO: Create the Third block using nn.Sequential with ConvTranspose2d, and activation
        self.convt3 = nn.Sequential(nn.ConvTranspose2d(128,num_input_channels,kernel_size=4,stride=2,padding=1,bias=False),
                                    nn.Tanh())

    def forward(self, x):
        # TODO: Define the forward of the network, x is a random noise, it should be forwarded through the fully connected, then reshaped and finally forwarded throught the conv layers
        x = self.fc(x)
        x=  x.reshape(-1,512,4,4)#batch size, channels, height, width
        x = self.convt1(x)
        x = self.convt2(x)
        x = self.convt3(x)

        return x

Similarly lets define a simple discriminator

### Exercise 2: Discriminator

The discriminator takes an image as input and classifies it between Real or Fake (1 or 0). Your exercise is to create the discriminator model.

It should follow these guidelines:
* The input will be an image of size `[num_input_channels, 32, 32]`
* You should apply 3 blocks of:
    * Convolution with kernel size 4, stride 2 and padding 1. The output channels should be 128, 256 and 512.
    * BatchNorm2d except for the first block.
    * LeakyReLU activation (alpha=0.2)
* Then you should apply a fully connected with input size 512\*4\*4 (channels\*height\*width) and the correct output size and activation for binary classification


**Hint**: Remember to use reshape/flatten where necessary

In [None]:
class Discriminator(torch.nn.Module):

    def __init__(self):
        super().__init__()

        # TODO: Create the First block using nn.Sequential with Conv2d and activation
        self.conv1 = nn.Sequential(nn.Conv2d(num_input_channels,128,kernel_size=4,stride=2,padding=1),
                                   nn.LeakyReLU())
        # TODO: Create the Second block using nn.Sequential with Conv2d, BatchNorm2d and activation
        self.conv2 = nn.Sequential(nn.Conv2d(128,256,kernel_size=4,stride=2,padding=1),
                                   nn.BatchNorm2d(256),
                                   nn.LeakyReLU(negative_slope=0.2))
        # TODO: Create the third block using nn.Sequential with Conv2d, BatchNorm2d and activation
        self.conv3 = nn.Sequential(nn.Conv2d(256,512,kernel_size=4,stride=2,padding=1),
                                   nn.BatchNorm2d(512),
                                   nn.LeakyReLU(negative_slope=0.2))
        # TODO: Create the fully connected block using nn.Sequential with Linear and activation
        self.fc = nn.Sequential(nn.Linear(512*4*4,1),
                                nn.Sigmoid())


    def forward(self, x):
        # TODO: Define the forward of the network, x is an image [num_input_channels, 32, 32], it should be forwarded through the conv layers, the flattened and forwarded through the fully connected
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.flatten(start_dim=1) #flatten will start in the 1st imagee
        x = self.fc(x)


In [None]:
generator = Generator().to(device)
optimizer_g = torch.optim.Adam(generator.parameters(), lr=lr, betas=betas)

discriminator = Discriminator().to(device)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=betas)

criterion = nn.BCELoss()

def init_weights(m):
    if type(m) in {nn.Conv2d, nn.ConvTranspose2d, nn.Linear}:
        torch.nn.init.normal_(m.weight, mean=0.0, std=0.02)
        if m.bias != None:
            torch.nn.init.constant_(m.bias, 0.0)
    if type(m) == nn.BatchNorm2d:
        nn.init.normal_(m.weight, 1.0, 0.02)
        nn.init.constant_(m.bias, 0)

generator.apply(init_weights)
discriminator.apply(init_weights);


## Train function

### Exercise 3: Train

Complete the code. Take into account which labels should be used at each step of the training.

In [None]:
def train_batch(real_samples, generator, discriminator, optimizer_g, optimizer_d):

    generator.train()
    discriminator.train()

    current_batch_size = real_samples.shape[0]
    # TODO: Define the labels for the real and fake images so the discriminator can learn, of size [batch_size,1]
    label_real = torch.ones((batch_size,1)).to(device)
    label_fake = torch.zeros((batch_size,1)).to(device)

    ####################
    # OPTIMIZE GENERATOR
    ####################

    # TODO: Reset gradients
    optimizer_g.zero_grad()

    # TODO: Generate fake samples
    input_noise = torch.randn((batch_size, noise_size)).to(device)
    fake_samples=generator(input_noise)
    # TODO: Evaluate the generated samples with the discriminator
    print(fake_samples)
    predictions_g_fake = discriminator(fake_samples)
    # Calculate error with respect to what the generator wants
    print(type(predictions_g_fake), type(label_real))
    loss_g = criterion(predictions_g_fake,label_real)

    # TODO: Backpropagate
    loss_g.backward()

    # TODO: Update weights (do a step in the optimizer)
    optimizer_g.step()

    ####################
    # OPTIMIZE DISCRIMINATOR
    ####################

    fake_samples = fake_samples.detach() #Let's detach them so they can be forwarded though the discriminator model
    # TODO: Reset gradients
    optimizer_d.zero_grad()

    # TODO: Calculate discriminator prediction for real samples
    predictions_d_real = real_samples

    # TODO: Calculate error with respect to what the discriminator wants
    loss_d_real = criterion(predictions_d_real,label_real)

    # TODO: Calculate discriminator loss for fake samples
    predictions_d_fake = discriminator(fake_samples)

    # TODO: Calculate error with respect to what the discriminator wants
    loss_d_fake = criterion(predictions_d_fake, label_fake)

    # Total discriminator loss
    loss_d = (loss_d_real + loss_d_fake) / 2
    # TODO: Backpropagate
    loss_d.backward()

    # TODO: Update weights (do a step in the optimizer)
    optimizer_d.step()

    return loss_g.item(), loss_d.item()


## Evaluation function

In [None]:
@torch.no_grad()
def evaluate(generator, z_val):
    generator.eval()
    fake_samples = generator(z_val).cpu()
    # select a sample or create grid if img is a batch
    nrows = int(math.sqrt(fake_samples.shape[0]))
    img = utils.make_grid(fake_samples, nrow=nrows)

    # unnormalize
    img = (img*0.5 + 0.5)*255

    # to numpy
    image_numpy = img.numpy().astype(np.uint8)
    image_numpy = np.transpose(image_numpy, (1, 2, 0))
    return Image.fromarray(image_numpy)


## Train loop

In [None]:
z_val = torch.randn(num_val_samples, noise_size, device=device)

for epoch in range(num_epochs):

    for i, (real_samples, labels) in enumerate(dataloader):
        real_samples = real_samples.to(device)
        loss_g, loss_d = train_batch(real_samples, generator, discriminator, optimizer_g, optimizer_d)

        if i % 100 == 0:
            fake_images = evaluate(generator, z_val)
            display(fake_images)

            # Show current loss
            print(f"epoch: {epoch+1}/{num_epochs} batch: {i+1}/{len(dataloader)} G_loss: {loss_g}, D_loss: {loss_d}")


tensor([[[[-0.0100,  0.0079, -0.0727,  ..., -0.1292,  0.1149, -0.0418],
          [ 0.1523,  0.1335,  0.4080,  ...,  0.2066,  0.1523, -0.0972],
          [ 0.1585, -0.0184,  0.0206,  ...,  0.0888,  0.0336, -0.0150],
          ...,
          [ 0.0923,  0.1345,  0.0869,  ..., -0.3946, -0.0146, -0.1077],
          [-0.0203, -0.1255,  0.1405,  ...,  0.0340,  0.0675,  0.0062],
          [-0.0572,  0.0892, -0.1064,  ..., -0.1321, -0.0578, -0.0334]]],


        [[[-0.0183, -0.0367,  0.0586,  ..., -0.0362, -0.0260, -0.0440],
          [-0.0193, -0.2435, -0.1859,  ..., -0.0048, -0.0739, -0.0395],
          [ 0.1784,  0.2512, -0.1639,  ..., -0.1454,  0.2857,  0.1563],
          ...,
          [ 0.1722, -0.1845,  0.0602,  ...,  0.0731,  0.0115, -0.1452],
          [ 0.1340, -0.0215,  0.1173,  ..., -0.1301,  0.0391,  0.0919],
          [-0.0428, -0.2568,  0.0300,  ..., -0.0883,  0.0587, -0.0830]]],


        [[[ 0.0631, -0.0146, -0.1171,  ..., -0.1210,  0.1282,  0.0295],
          [ 0.1727, -0.080

AttributeError: ignored

# Extra: Conditional GAN

## Networks

### Exercise 4: Generator

We will now modify the generator from before to a conditional generator. To do it, we will concatenated the input to the convolutions with an embedding of the label we want to generate.

Complete the forward method. To do it, use the embedding layer with the label, and then use `torch.cat` to concatenate the label as a channel (after the corresponding `reshape`)

**Hint**: The embedding is concatenated as a new channel.

In [None]:
class ConditionalGenerator(torch.nn.Module):

    def __init__(self):
        super().__init__()

        # TODO: Create the Fully connected layer using nn.Linear
        self.fc =...

        self.embedding = nn.Embedding(num_classes, 4*4)

        # TODO: Create the First block using nn.Sequential with ConvTranspose2d, BatchNorm2d and activation
        self.convt1 = nn.Sequential(...) #It is important that we add +1 to the first number of channels, since we have an extra channel that will be concatenated
        # TODO: Create the Second block using nn.Sequential with ConvTranspose2d, BatchNorm2d and activation
        self.convt2 = nn.Sequential(...)
        # TODO: Create the Third block using nn.Sequential with ConvTranspose2d, and activation
        self.convt3 = nn.Sequential(...)


    def forward(self, x, label):
        # TODO: Define the forward of the generator (first forward to the fc layer, reshape it, compute the embedding and concatenate it with x, and forward the convolutional layers)
        x = ... #First forward through the fully connected network
        x = ... #reshape it so it can be an image of [bs, 512,4,4]

        emb = self.embedding(label).view(-1, 1, 4, 4) #This is done to be able to concatenate the lable with the noise image
        x = torch.cat([x, emb], dim=1) ## x = torch.cat(..., dim=1)

        x = self.convt1(x) ## x = ... #now we can forward it though the 3 convolutional layers
        x = self.convt2(x) ## x = ...
        x = self.convt3(x) ## x = ...
        return x

### Exercise 5: Discriminator

We will now modify the discriminator from before to a conditional discriminator. To do it, we will concatenated the input image with an embedding of the label we want to generate.

Complete the forward method. To do it, use the embedding layer with the label, and then use `torch.cat` to concatenate the label as a channel (after the corresponding `reshape`)

**Hint**: The embedding is concatenated as a new channel.

In [None]:
class ConditionalDiscriminator(torch.nn.Module):

    def __init__(self):
        super().__init__()

        self.embedding = nn.Embedding(num_classes, 32*32)

        # TODO: Create the First block using nn.Sequential with Conv2d and activation, remember to add +1 a the number of input channels
        self.conv1 = nn.Sequential(...)
        # TODO: Create the Second block using nn.Sequential with Conv2d, BatchNorm2d and activation
        self.conv2 = nn.Sequential(...)
        # TODO: Create the third block using nn.Sequential with Conv2d, BatchNorm2d and activation
        self.conv3 = nn.Sequential(...)
        # TODO: Create the fully connected block using nn.Sequential with Linear and activation
        self.fc = nn.Sequential(...)

    def forward(self, x, label):
        # TODO: Define the forward of the discriminator (create the embedding and concatenate it with the images, forward though convolutional layers, flatten, and forward to the fc)
        x = torch.cat(...)

        x = self.conv1(x) ## x = ... #now we can forward it though the 3 convolutional layers
        x = self.conv2(x) ## x = ...
        x = self.conv3(x) ## x = ...
        x = x.flatten(start_dim=1) ## x = ... #flatten to forward it to the fully connected
        x = self.fc(x) ## x = ... # now we can forward it though the fully connected
        return x


In [None]:
generator = ConditionalGenerator().to(device)
optimizer_g = torch.optim.Adam(generator.parameters(), lr=lr, betas=betas)

discriminator = ConditionalDiscriminator().to(device)
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=betas)

criterion = nn.BCELoss()

def init_weights(m):
    if type(m) in {nn.Conv2d, nn.ConvTranspose2d, nn.Linear}:
        torch.nn.init.normal_(m.weight, mean=0.0, std=0.02)
        if m.bias != None:
            torch.nn.init.constant_(m.bias, 0.0)
    if type(m) == nn.BatchNorm2d:
        nn.init.normal_(m.weight, 1.0, 0.02)
        nn.init.constant_(m.bias, 0)

generator.apply(init_weights)
discriminator.apply(init_weights);


## Train function

In [None]:
def train_batch_conditional(real_samples, real_labels, generator, discriminator, optimizer_g, optimizer_d):

    generator.train()
    discriminator.train()
    current_batch_size = real_samples.shape[0]
    # Define the labels for the real and fake images so the discriminator can learn, of size [batch_size,1]
    label_real = torch.ones(current_batch_size, 1, device=device)  ##label_real = torch...
    label_fake = torch.zeros(current_batch_size, 1, device=device) ##label_fake = torch...

    ####################
    # OPTIMIZE GENERATOR
    ####################

    # TODO: Reset gradients
    optimizer_g...

    # TODO: Generate fake samples
    fake_labels = torch.randint(...) #we dfine the int to which we will condition the network
    fake_samples = ...

    # TODO: Evaluate the generated samples with the discriminator
    predictions_g_fake = ...

    # TODO: Calculate error with respect to what the generator wants
    predictions_g_fake = loss_g = criterion(...)

    # TODO: Backpropagate
    loss_g.

    # TODO: Update weights
    optimizer_g.

    ####################
    # OPTIMIZE DISCRIMINATOR
    ####################

    fake_samples = fake_samples.detach()
    # TODO: Reset gradients
    optimizer_d...

    # TODO: Calculate discriminator prediction for real samples
    predictions_d_real = ...

    # TODO: Calculate error with respect to what the discriminator wants
    predictions_d_real = loss_d_real = criterion(...)

    # TODO: Calculate discriminator loss for fake samples
    predictions_d_fake = ...

    # TODO: Calculate error with respect to what the discriminator wants
    loss_d_fake = loss_d_real = criterion(...)


    # Total discriminator loss
    loss_d = (loss_d_real + loss_d_fake) / 2

    # TODO: Backpropagate
    loss_d...

    # TODO: Update weights
    optimizer_d.

    return loss_g.item(), loss_d.item()


## Evaluation function

In [None]:
@torch.no_grad()
def evaluate_conditional(generator, z_val, labels_val):
    generator.eval()
    fake_samples = generator(z_val, labels_val).cpu()
    # select a sample or create grid if img is a batch
    nrows = int(math.sqrt(fake_samples.shape[0]))
    img = utils.make_grid(fake_samples, nrow=nrows)

    # unnormalize
    img = (img*0.5 + 0.5)*255

    # to numpy
    image_numpy = img.numpy().astype(np.uint8)
    image_numpy = np.transpose(image_numpy, (1, 2, 0))
    return Image.fromarray(image_numpy)


## Train loop

In [None]:
from itertools import cycle

z_val = torch.randn(num_val_samples, noise_size, device=device)
labels_cycle = cycle(range(num_classes))
labels_val = torch.tensor([next(labels_cycle) for i in range(num_val_samples)], device=device).unsqueeze(1) #the labels will be a cycle from 0 to 9
for epoch in range(num_epochs):

    for i, (real_samples, real_labels) in enumerate(dataloader):
        real_samples = real_samples.to(device)
        real_labels = real_labels.unsqueeze(1).to(device)
        loss_g, loss_d = train_batch_conditional(real_samples, real_labels, generator, discriminator, optimizer_g, optimizer_d)

        if i % 100 == 0:
            fake_images = evaluate_conditional(generator, z_val, labels_val)
            display(fake_images)

            # Show current loss
            print(f"epoch: {epoch+1}/{num_epochs} batch: {i+1}/{len(dataloader)} G_loss: {loss_g}, D_loss: {loss_d}")


In [None]:
#You can play and visualize different numbers
number_chosen = 2
z_val = torch.randn(num_val_samples, noise_size, device=device)
number_chosen_torch = torch.tensor([number_chosen for i in range(num_val_samples)], device=device).unsqueeze(1)

images_number_chosen = evaluate_conditional(generator, z_val, number_chosen_torch)
display(images_number_chosen)