In [1]:
import os 
import argparse
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils import 
from torchvision import models, datasets, transforms
import torch.functional as F

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
root = "C:\\Users\\shant\\celeba"
LEARNING_RATE = 0.00001
BATCH_SIZE = 64
Z_DIM = 100
FEATURES_DISC = 64
FEATURES_GEN = 64
IMAGE_CHANNELS = 3
NUM_EPOCHS = 5
IMAGE_SIZE = 64
CRITIC_ITERATIONS = 5
LAMBDA_GP = 10

dataset = datasets.ImageFolder(root = root , transform=transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
]))

dataloader = DataLoader(dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 2)

In [4]:
# Implementation of the Wasserstein GAN
# WGAN Has better stability and loss means something for WGAN: Its a termination criteria
# WGAN Also prevents Mode Collapse(Model only outputs specific classes)
# When Discriminator converged to 0 obtained great results

class Generator(nn.Sequential):
    """
    z_dim: 
    channels_img: Input channels(for example for an RGB image this value is 3)
    features_g: Size of the output feature map(In this case its 64x64)
    """
    def __init__(self, z_dim, channels_img, features_g):
        
        modules = [self._block(z_dim, features_g*16, 4, 1, 0),
                   self._block(features_g*16, features_g*8, 4, 2, 1),
                   self._block(features_g*8, features_g*4, 4, 2, 1),
                   self._block(features_g*4, features_g*2, 4, 2, 1),
                   nn.ConvTranspose2d(features_g*2, channels_img, 4, 2, 1),
                   nn.Tanh()]
        
        super(Generator, self).__init__(*modules)
    
    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        
        return nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
class Critic(nn.Sequential):
    
    def __init__(self, channels_img, features_d):
        
        modules = [nn.Conv2d(channels_img, features_d, kernel_size = 4, stride = 2, padding = 1), #32x32
                   nn.LeakyReLU(0.2, inplace=True),
                   self._block(features_d, features_d*2, 4, 2, 1),# 16x16
                   self._block(features_d*2, features_d*4, 4, 2, 1), #8x8
                   self._block(features_d*4, features_d*8, 4, 2, 1), #4x4
                   nn.Conv2d(features_d*8, 1, kernel_size = 4, stride = 2, padding = 0)]
        
        super(Critic, self).__init__(*modules)
    
    def _block(self, in_channels, out_channels, kernel_size, stride, padding):
        
        return nn.Sequential(
         nn.Conv2d(in_channels, out_channels,kernel_size, stride, padding, bias=False),
            nn.InstanceNorm2d(out_channels, affine = True), # Learnable Parameters
            nn.LeakyReLU(0.2, inplace=True)
        )

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    if classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1, 0.02)
        nn.init.constant_(m.bias.data, 0)

def gradient_penalty(critic, real, fake, device = "cpu"):
    batch_size, C, H, W = real.shape
    # Creating interpolated images
    epsilon = torch.randn([batch_size, 1, 1, 1]).repeat(1,C,H,W).to(device)
    interpolated_images = epsilon*real + (1-epsilon) * fake

    #calculate critic scores
    mixed_scores = critic(interpolated_images)

    # Compute the gradients with respect to the interpolated images, just need the first value
    gradient = torch.autograd.grad(
        inputs = interpolated_images, 
        outputs = mixed_scores, 
        grad_outputs = torch.ones_like(mixed_scores),
        create_graph=True,
        retain_graph=True)[0]

    # Number of Dimension
    # Calculate the Norm of the gradient to Eforce 1-Lipschitz Constraint
    gradient = gradient.view(gradient.size(0), -1)
    gradient_norm = gradient.norm(2, dim = 1)
    gradient_penalty = torch.mean((gradient_norm - 1)**2)

    return gradient_penalty

# def test():
#     N, in_channels, H, W = 8, 3, 64, 64
#     z_dim = 100
#     X = torch.randn((N, in_channels, H, W))
#     disc = Critic(in_channels,8)
#     disc.apply(weights_init)
#     assert disc(X).shape == (N, 1, 1, 1) # One Value per example
#     gen = Generator(z_dim, in_channels, 64)
#     gen.apply(weights_init)
#     z = torch.randn((N, z_dim, 1, 1))
#     assert gen(z).shape == (N, in_channels, H, W) # Ouput Generated image
#     print("Success")
# test()

In [5]:
########################
# Generator and Discriminator Model objects
########################
generator = Generator(Z_DIM,IMAGE_CHANNELS,FEATURES_GEN).to(device)
critic = Critic(IMAGE_CHANNELS,FEATURES_DISC).to(device)

########################
# Weight Initialization for the model
########################
generator.apply(weights_init)
critic.apply(weights_init)

########################
# Optimizers for Critic and the Generator
########################
optimizer_gen = optim.Adam(generator.parameters(), lr = LEARNING_RATE, betas = (0,0.9))
optimizer_critic = optim.Adam(critic.parameters(), lr = LEARNING_RATE, betas = (0,0.9))

#######################
# Create tensorboard SummaryWriter objects to display generated fake images and associated loss curves
#######################
writer_real = SummaryWriter(f"logs/real")
writer_fake = SummaryWriter(f"logs/fake")
loss_curves = SummaryWriter(f"logs/loss_curves")

#######################
# Create a batch of latent vectors. Will be used to to do a single pass through the generator after 
# the training has terminated
#######################
fixed_noise = torch.randn((64, Z_DIM, 1, 1)).to(device)

step = 0 # For printing to tens

NameError: name 'SummaryWriter' is not defined