In [11]:
import torch
import torchvision
import torch.nn as nn # Neural Network Modules suchas nn.Linear, nn.Conv, nn.BatchNorm, Loss Functions, etc.
import torch.optim as optim # Optimization algorithms such as Adam, SGD, etc.
import torchvision.datasets as datasets # For importing standard datasets
import torchvision.transforms as transforms # Transformations that can be performed on the datasets
from torch.utils.data import DataLoader # Easier dataset management and mini-batches
from torch.utils.tensorboard import SummaryWriter

In [12]:
# You can import these classes from a new python script

# e.g. from model_utils import Discriminator, Generator

In [16]:
class Discriminator(nn.Module):
    
    def __init__(self, channels_img, features_d):  # features_d: How many channels the architecture is gonna use for each layer
        super(Discriminator, self).__init__()
        
        # LeakyRELU and BatchNorm stablizes the training in an architecture
        self.net = nn.Sequential(
            
            # N x channels_img x 64 x 64
            nn.Conv2d(channels_img, features_d, kernel_size = 4, stride = 2, padding = 1),
            nn.LeakyReLU(0.2),
            
            # N x features_d x 32 x 32
            nn.Conv2d(features_d, features_d*2, kernel_size = 4, stride = 2, padding = 1 ),
            nn.BatchNorm2d(features_d*2),
            nn.LeakyReLU(0.2),
            
            # N x features_d*2 x 16 x 16
            nn.Conv2d(features_d*2, features_d*4, kernel_size = 4, stride = 2, padding = 1),
            nn.BatchNorm2d(features_d*4),
            nn.LeakyReLU(0.2),
            
            # N x features_d*4 x 8 x 8
            nn.Conv2d(features_d*4, features_d*8, kernel_size = 4, stride = 2, padding = 1),
            nn.BatchNorm2d(features_d*8),
            nn.LeakyReLU(0.2),
            
            # N x features_d*8 x 4 x 4
            nn.Conv2d(features_d*8, 1, kernel_size =4, stride = 2, padding =0),
            
            # N x 1 x 1 x 1  => Just see now if the value is 0 or 1 to discriminate
            nn.Sigmoid()
        
        )
        
    def forward(self, x):
        
        # Because it is through a Sequential Layer, we do not have to explicitly define the order. It is more like already defined
        return self.net(x)        

In [17]:
class Generator(nn.Module):
    # Channel noise is the length of the noise vector and channel_img is the number of channels in the output image
    def __init__(self, channels_noise, channels_img, features_g):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            # N x channels_noise x 1 x 1
            nn.ConvTranspose2d(channels_noise, features_g*16, kernel_size = 4, stride = 1, padding = 0),
            nn.BatchNorm2d(features_g*16),
            nn.ReLU(features_g*16),
            
            # N x features_g x 4 x 4
            nn.ConvTranspose2d(features_g*16, features_g*8, kernel_size = 4, stride = 2, padding =1),
            nn.BatchNorm2d(features_g*8),
            nn.ReLU(features_g*8),
            
            nn.ConvTranspose2d(features_g*8, features_g*4, kernel_size = 4, stride = 2, padding =1),
            nn.BatchNorm2d(features_g*4),
            nn.ReLU(features_g*4),
            
            nn.ConvTranspose2d(features_g*4, features_g*2, kernel_size = 4, stride = 2, padding =1),
            nn.BatchNorm2d(features_g*2),
            nn.ReLU(features_g*2),
            
            nn.ConvTranspose2d(features_g*2, channels_img, kernel_size =4, stride =2, padding =1),
            # N x channel_img x 64 x 64
            nn.Tanh()
        )
    
    def forward(self, x):
        return self.net(x)

In [18]:
# Hyperparameters
lr = 0.0002
batch_size = 64
image_size = 64 # 28x28
channel_img = 1  # Since MNIST
channel_noise = 256
num_epochs = 10

features_d = 16
features_g = 16

my_transforms = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
]
)

dataset = datasets.MNIST(root= "dataset/", train = True, transform= my_transforms, download = True)
dataloader = DataLoader(dataset, batch_size= batch_size, shuffle = True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create discriminator and generator

netD = Discriminator(channel_img, features_d).to(device)
netG = Generator(channel_noise, channel_img, features_g).to(device)

# Optimizers for G and D

optimizerD = optim.Adam(netD.parameters(), lr =lr, betas = (0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr =lr, betas = (0.5, 0.999))

netG.train()
netD.train()

# Loss function
criterion = nn.BCELoss()

In [19]:
real_label = 1
fake_label = 0

In [20]:
fixed_noise = torch.randn(64, channel_noise, 1, 1).to(device)

In [21]:
writer_real = SummaryWriter(f'runs/DC-GAN/test_real')
writer_fake = SummaryWriter(f'runs/DC-GAN/test_fake')

In [24]:
print("Starting training")

for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(dataloader):
        data = data.to(device)
        batch_size = data.shape[0]
        
        ## Training Discriminator: max log(D(x)) + log(1 - D(G(z)))
        
        netD.zero_grad()
        label = (torch.ones(batch_size)*0.9).to(device)
        
        output = netD(data).reshape(-1)
        lossD_real = criterion(output, label)
        D_x = output.mean()
        
        
        # Next we will calculate the loss again but this time on the fake images and then will combine the two losses
        # But this time, we will not calculate the gradient
        
        noise = torch.randn(64, channel_noise, 1, 1).to(device)
        label = (torch.ones(batch_size)*0.1).to(device)
        fake = netG(noise)
        
        output = netD(fake.detach()).reshape(-1) # .detach() because we do not want to calculate gradient
        lossD_fake = criterion(output, label)
        
        lossD = lossD_fake + lossD_real
        
        lossD.backward()
        optimizerD.step()
        
        # Training the Generator: max log(D(G(z)))
        
        netG.zero_grad()
        label = torch.ones(batch_size).to(device)
        output = netD(fake).reshape(-1)
        lossG = criterion(output, label)
        lossG.backward()
        optimizerG.step()
        
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(dataloader)} Loss D: {lossD:.4f},\
             Loss G: {lossG:.4f} D(x): {D_x:.4f}")
            
            
            with torch.no_grad():
                fake = netG(fixed_noise)
                
                img_grid_real = torchvision.utils.make_grid(data[:32], normalize = True)
                img_grid_fake = torchvision.utils.make_grid(fake[:32], normalize = True)
                writer_real.add_image("MNIST Real", img_grid_real)
                writer_fake.add_image("MNIST Fake", img_grid_fake)

Starting training
Epoch [0/10] Batch 0/938 Loss D: 1.4244,             Loss G: 0.9433 D(x): 0.4427
Epoch [0/10] Batch 100/938 Loss D: 0.6684,             Loss G: 2.9035 D(x): 0.8783
Epoch [0/10] Batch 200/938 Loss D: 0.6648,             Loss G: 2.5056 D(x): 0.8716
Epoch [0/10] Batch 300/938 Loss D: 0.7007,             Loss G: 2.1429 D(x): 0.8183


KeyboardInterrupt: 