# Generative adversarial network (GAN) for [MNIST](http://yann.lecun.com/exdb/mnist/) handwritten digit dataset

During the workshop, you were shown how to implement a previously trained GAN. The task of building and training your own neural network that generates MRI data is an arduous one, often worthy of a PhD.\
\
Today, you will be given a few tasks that will allow you to examine a more basic GAN implementation found [here](https://github.com/lyeoni/pytorch-mnist-GAN/blob/master/pytorch-mnist-GAN.ipynb), that uses PyTorch and the MNIST handwritten digit dataset (you can find another, almost identical, script to do this [here](https://github.com/jsuarez5341/GAN-MNIST-Pytorch/blob/master/main.py)).\
\
This model is a lot simpler than what you would need to generate high-resolution and 3D MR images, but it will allow you to gain a better intuition for the architecture of GANs. Have fun filling in the blanks! :)

# Part 1: fill in the missing code

In [None]:
# prerequisites
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
import matplotlib.pyplot as plt
import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
BATCH_SIZE = 100

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

# MNIST Dataset
train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=transform, download=False)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"You have {len(train_loader)} training instances and {len(test_loader)} testing instances.")

Take a quick look at your data

In [None]:
it=iter(train_loader)

In [None]:
dataiter = iter(train_loader)
for i in range(3):
    images, labels = dataiter.next()
    print(images.shape)

**Questions:** \
(a) What information does each of the tensors above give you?\
 (b) Are these images in grayscale or in color? \
 (c) How many channels are there in these images? \

Fill in the missing lines of code below (they start with ```# =======```)

In [None]:
class Generator(nn.Module):
    def __init__(self, g_input_dim, g_output_dim):
        super(Generator, self).__init__()       
        self.fc1 = nn.Linear(g_input_dim, 256)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features*2)
# ============ Add a 3rd linear layer here
        self.fc4 = nn.Linear(self.fc3.out_features, g_output_dim)
    
    # forward method
    def forward(self, x): 
        x = F.leaky_relu(self.fc1(x), 0.2)
# ============ Pass through your 2nd linear layer, then through a Leaky Relu function (slope=0.2)
# ============ Pass through your 3rd linear layer, then through a Leaky Relu function (slope=0.2)
        return torch.tanh(self.fc4(x))
    
class Discriminator(nn.Module):
    def __init__(self, d_input_dim):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(d_input_dim, 1024)
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features//2)
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features//2)
# ============ add a 4th linear layer that outputs 1 feature
    
    # forward method
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2)
        x = F.dropout(x, 0.3)
# ============ Pass through your 2nd linear layer, then through a Leaky Relu function (slope=0.2)
# ============ Add a dropout layer that zeros input elemnts with probability 0.3
# ============ Pass through your 3rd linear layer, then through a Leaky Relu function (slope=0.2)
# ============ Add a dropout layer that zeros input elemnts with probability 0.3
# ============ Pass your output through your 4th linear layer, then a sigmoid function, and return the result!

In [None]:
# build network
z_dim = 100
mnist_dim = train_dataset.train_data.size(1) * train_dataset.train_data.size(2)

G = Generator(g_input_dim = z_dim, g_output_dim = mnist_dim).to(device)
D = Discriminator(mnist_dim).to(device)

In [None]:
G

In [None]:
D

In [None]:
# loss
criterion = nn.BCELoss() 

# optimizer
lr = 0.0002 
G_optimizer = optim.Adam(G.parameters(), lr = lr)
# ============ Define your discriminator Adam optimizer and call it D_optimizer

In [None]:
def D_train(x):
    ############### Train the discriminator ###############
    D.zero_grad()

    # train discriminator on real
    x_real, y_real = x.view(-1, mnist_dim), torch.ones(BATCH_SIZE, 1)
    x_real, y_real = Variable(x_real.to(device)), Variable(y_real.to(device))

    D_output = D(x_real)
    D_real_loss = criterion(D_output, y_real)
    D_real_score = D_output

    # train discriminator on fake
    z = Variable(torch.randn(BATCH_SIZE, z_dim).to(device))
    x_fake, y_fake = G(z), Variable(torch.zeros(BATCH_SIZE, 1).to(device))

    D_output = D(x_fake)
    D_fake_loss = criterion(D_output, y_fake)
    D_fake_score = D_output

    # gradient backprop & optimize ONLY D's parameters
    D_loss = D_real_loss + D_fake_loss
    D_loss.backward()
    D_optimizer.step()
        
    return  D_loss.data.item()

In [None]:
def G_train(x):
    ############### Train the generator ###############
# ============ Reset your generator parameter gradient to zero

    z = Variable(torch.randn(BATCH_SIZE, z_dim).to(device))
    y = Variable(torch.ones(BATCH_SIZE, 1).to(device))

    G_output = G(z)
    D_output = D(G_output)
    G_loss = criterion(D_output, y)

    # gradient backprop & optimize ONLY G's parameters
    G_loss.backward()
# ============ Update your generator parameters
        
    return G_loss.data.item()

In [None]:
N_EPOCH = 100
for epoch in range(1, N_EPOCH+1):           
    D_losses, G_losses = [], []
    for batch_idx, (x, _) in enumerate(train_loader):
        D_losses.append(D_train(x))
        G_losses.append(G_train(x))

    print('[%d/%d]: loss_d: %.3f, loss_g: %.3f' % (
            (epoch), N_EPOCH, torch.mean(torch.FloatTensor(D_losses)), torch.mean(torch.FloatTensor(G_losses))))

In [None]:
os.mkdir("generated_digits_part_1") # directory where you will save a tile of your fake images
with torch.no_grad():
    test_z = Variable(torch.randn(BATCH_SIZE, z_dim).to(device))
    generated = G(test_z)
    save_image(generated.view(generated.size(0), 1, 28, 28), f'generated_digits_part_1/sample_{BATCH_SIZE}batchSize{BATCH_SIZE}_{N_EPOCH}epochs.png')

In [None]:
import glob

# take a look at your fake images
from IPython.display import Image

for im in glob.glob('generated_digits_part_1/*.png'):
    print(f"{im} :")
    display(Image(im))

# Part 2: testing different batch sizes and numbers of epochs
Convert the code above into two nested loops that iterate through a list of batch sizes and epochs. Make sure to save your generated images with a meaningful naming convention (see above example). \
**Tip:** We recommend nesting your N_EPOCH loop within the code where it was defined above, not to have to reload the data with every iteration. You will however have to reload the data for each change in batch size.

In [None]:
# directory where you will store your outputs:
os.mkdir("generated_digits_part_2")