In [None]:
import argparse
import os
import numpy as np
import math
import matplotlib.pyplot as plt

import torchvision.transforms as transforms
from torchvision import transforms
from torchvision.utils import save_image
import torchvision

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch

from pytorch_pretrained_biggan import (BigGAN, one_hot_from_names, truncated_noise_sample,
                                       save_as_images, display_in_terminal)

import pylab
import sys

sys.path.insert(0,'gan-vae-pretrained-pytorch/cifar100_dcgan_grayscale/') # So we can import point_density_functions from parent directory
from dcgan import Discriminator, Generator

%load_ext autoreload
%autoreload 2

In [None]:
# Functions for optimizing the gradient
def grad_loop(im,x,generator,optimizer,scheduler=None,n_epochs=10000,epsilon = 1):
    '''
    grad_loop finds the nearest image to im in the range of generator, 
    returns the loss curve and the generated image. 
    '''
    loss_list = []
    
    for epoch in range(n_epochs):

        optimizer.zero_grad()

        im_g = generator(torch.clamp(x,-5,5))

        loss = F.mse_loss(im_g,im,reduction = 'sum') / (im.size(0)*im.size(1))
        loss.backward(retain_graph=True)
        optimizer.step()
        if scheduler:
            scheduler.step()
        if epoch % 100 == 0:
            loss_list.append(loss.item())
            print("Epoch {}\t Loss: {:2.4f}".format(epoch,loss))
        if loss < epsilon:
            break
#     print("X max: ",x.max())
#     print("X min: ",x.min())
    return loss_list, im_g, x

In [None]:
def convergence_loop(op_list,lr_list,generator,x_keep,im,n_epochs=10000):
    '''
    convergence_loop compares learning rates and optimizers for on the grad_loop function.
    '''
    # Optimizer comparison loop
    loss_dict = {}
    for op in op_list:
        for lr in lr_list:
            x = x_keep.clone().requires_grad_()
            optimizer = op([x],lr=lr)
            dict_key = str(op)[-6:]+'_'+str(lr)
            loss_list,_,_ = grad_loop(im,x,generator,optimizer,n_epochs=n_epochs)
            loss_dict[dict_key] = loss_list
            print("Opt: {}\tLR: {} Done".format(op,lr))
    plt.figure(figsize=[12,12])
    for k in loss_dict.keys():
        plt.plot(loss_dict[k],label=str(k))
    plt.legend(loc='upper right',fontsize=15)
    return loss_dict

def plot_loss_curve(loss_dict,title,keys=None):
    # Given a loss_dict where the optimizer name and LR are the key, plots the learning curves
    if keys == None:
        keys = loss_dict.keys()
    plt.figure(figsize=[15,9])
    for k in keys:
        plt.plot(np.arange(0,100*len(loss_dict[k]),100),loss_dict[k],label=str(k))
    plt.legend(loc='upper right',fontsize=15)
    plt.ylabel("MSE Loss per pixel",fontsize=12)
    plt.xlabel("Epoch",fontsize=12)
    plt.title(title,fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
# im is the target
def plot_img(im,grayscale=True):
    if im.min() < 0:
        im = im*0.5 + 0.5
    if grayscale:
        plt.imshow(im.detach().to('cpu')[0,0],cmap='gray')
    else:
        plt.imshow(im[0,:,:,:].detach().to('cpu').permute(1,2,0))

## DCGAN Grayscale/CIFAR100

In [None]:
os.makedirs("images", exist_ok=True)

class Args(object):
    def __init__(self):
        self.n_epochs = 200
        self.batch_size = 64
        self.lr = .002
        self.b1 = 0.5
        self.b2 = 0.999
        self.n_cpu = 8
        self.latent_dim = 100
        self.img_size = 32
        self.channels = 3
        self.sample_interval = 400
        self.batch_size = 32
        self.dataroot = 'cifar10'
        self.image_size = 32
        self.grayscale = True

opt=Args()

cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
num_gpu = 1 if torch.cuda.is_available() else 0

In [None]:
# load the models
from dcgan import Generator

if opt.grayscale:
    generator = Generator(ngpu=1,nc=1).eval()
    # load weights
    generator.load_state_dict(torch.load('gan-vae-pretrained-pytorch/cifar100_dcgan_grayscale/models/netG_epoch_999.pth'))
else:
    generator = Generator(ngpu=1,nc=3).eval()
    # load weights
    generator.load_state_dict(torch.load('gan-vae-pretrained-pytorch/cifar100_dcgan_grayscale/models/netG_rgb_epoch_999.pth'))
    
if torch.cuda.is_available():
    generator = generator.cuda()

# Freeze the model
for param in generator.parameters():
    param.requires_grad = False

### Test 1: Find im that is in latent space

In [None]:
# Initialize z, the target latent vector 
z = Variable(torch.randn(1, opt.latent_dim))
# Initialize x, the initial latent vector
x_keep = Variable(Tensor(np.random.normal(0, 1, (1, opt.latent_dim))),requires_grad=False)

# Generate im, the target image
im = generator(z.unsqueeze(2).unsqueeze(2).to('cuda'))
plot_img(im,opt.grayscale)

In [None]:
# Generate im_first, the initial guess image
with torch.no_grad():
    im_first = generator(x_keep.unsqueeze(2).unsqueeze(2).to('cuda'))
    plot_img(im_first,opt.grayscale)

In [None]:
# after optimizing
with torch.no_grad():
    plot_img(im_g,False)

#### Optimize once with grad loop

In [None]:
# Optimize once, given optimizer parameters
x = x_keep.clone().requires_grad_()

In [None]:
optimizer = torch.optim.SGD([x],lr=0.01)
loss_list, im_g,x_out = grad_loop(im,x.unsqueeze(2).unsqueeze(2).to('cuda'),
                            generator,optimizer,scheduler=None,n_epochs=100000,epsilon = .01)

In [None]:
# Latent vector norm
print("Latent norm initial: ",torch.norm(x_keep-z.to('cuda')))
print("Latent norm final: ",torch.norm(x_out[:,:,0,0]-z.to('cuda')))

#### Plot loss curves for different optimizers

In [None]:
op_list = [torch.optim.SGD,torch.optim.Adam]
lr_list = [1e-5,1e-4,1e-3,1e-2]
loss_dict = convergence_loop(op_list,lr_list,generator,x.unsqueeze(2).unsqueeze(2),im)

In [None]:
plot_loss_curve(loss_dict,title='Learning curves for DCGAN on image in training set')

In [None]:
# Compare loss across optimzers at a given epoch
for k in loss_dict.keys():
    print(k,": ",loss_dict[k][5])

### TEST 2: Find im not in the latent space

In [None]:
# Load the data

if opt.grayscale:
    dataset = datasets.CIFAR100(root=opt.dataroot, download=True,
                       transform=transforms.Compose([
                           transforms.Grayscale(),
                           transforms.Resize(opt.image_size),
                           transforms.ToTensor(),
                           transforms.Normalize((0.5), (0.5)),
                       ]))
else: 
    dataset = datasets.CIFAR100(root=opt.dataroot, download=True,
                       transform=transforms.Compose([
                           # transforms.Grayscale(),
                           transforms.Resize(opt.image_size),
                           transforms.ToTensor(),
                           transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
                       ]))
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1,
                                         shuffle=True, num_workers=1)

In [None]:
# Initialize x, the initial latent vector
x_keep = Variable(Tensor(np.random.normal(0, 1, (1, opt.latent_dim))),requires_grad=False)

# Generate im, the target image
im,label = next(iter(dataloader))
im = im.to('cuda')
plot_img(im,opt.grayscale)

In [None]:
# Generate im_first, the initial guess image
with torch.no_grad():
    im_first = generator(x_keep.unsqueeze(2).unsqueeze(2).to('cuda'))
    plot_img(im_first,opt.grayscale)

In [None]:
# after optimizing
with torch.no_grad():
    plot_img(im_g,opt.grayscale)

#### Plot loss curves for different optimizers

In [None]:
op_list = [torch.optim.SGD,torch.optim.Adam,torch.optim.AdamW,torch.optim.ASGD]
lr_list = [1e-4,1e-3,1e-2]
loss_dict = convergence_loop(op_list,lr_list,generator,x.unsqueeze(2).unsqueeze(2),im,n_epochs=100)

In [None]:
plot_loss_curve(loss_dict,title='Learning curves for DCGAN on image in training set')

In [None]:
# Compare loss across optimzers at a given epoch
for k in loss_dict.keys():
    print(k,": {:2.4f}".format(loss_dict[k][5]))

#### Optimize once with grad loop

In [None]:
# Optimize once, given optimizer parameters
x = x_keep.clone().requires_grad_()
optimizer = torch.optim.SGD([x],lr=0.01)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5000,10000,20000,40000], gamma=0.5)

In [None]:
loss_list, im_g, x_out = grad_loop(im,x.unsqueeze(2).unsqueeze(2).to('cuda'),
                            generator,optimizer,scheduler=scheduler,n_epochs=50000,epsilon = .01)

In [None]:
plot_img(im_g,opt.grayscale)

## DCGAN MNIST

In [None]:
def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)


class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.init_size = opt.img_size // 4
        self.l1 = nn.Sequential(nn.Linear(opt.latent_dim, 128 * self.init_size ** 2))

        self.conv_blocks = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, opt.channels, 3, stride=1, padding=1),
            nn.Tanh(),
        )

    def forward(self, z):
        out = self.l1(z)
        out = out.view(out.shape[0], 128, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        def discriminator_block(in_filters, out_filters, bn=True):
            block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
            if bn:
                block.append(nn.BatchNorm2d(out_filters, 0.8))
            return block

        self.model = nn.Sequential(
            *discriminator_block(opt.channels, 16, bn=False),
            *discriminator_block(16, 32),
            *discriminator_block(32, 64),
            *discriminator_block(64, 128),
        )

        # The height and width of downsampled image
        ds_size = opt.img_size // 2 ** 4
        self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())

    def forward(self, img):
        out = self.model(img)
        out = out.view(out.shape[0], -1)
        validity = self.adv_layer(out)

        return validity

In [None]:
os.makedirs("images", exist_ok=True)

class Args(object):
    def __init__(self):
        self.n_epochs = 200
        self.batch_size = 64
        self.lr = .002
        self.b1 = 0.5
        self.b2 = 0.999
        self.n_cpu = 8
        self.latent_dim = 100
        self.img_size = 32
        self.channels = 3
        self.sample_interval = 400
        self.batch_size = 32

opt=Args()

cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

### Optimizer Test
Process: 
1) Select an image that is in the manifold of generator, and its random vector
2) Generate a random latent vector input, z.
3) Optimize that vector z to minimize $\|G(z) - im \|_2^2$


In [None]:
cuda = True if torch.cuda.is_available() else False
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

In [None]:
class OptimizerArgs(object):
    def __init__(self):
        self.n_epochs = 2000
        self.lr = .00005
        self.epsilon = 1e-2
opt2=OptimizerArgs()

In [None]:
# Load generator
MODEL_PATH = "models/dcgan/generator_cifar10.pth"
generator = Generator()
generator.load_state_dict(torch.load(MODEL_PATH))
generator.cuda()

In [None]:
z = Variable(Tensor(np.random.normal(0, 1, (1, opt.latent_dim))))

In [None]:
z

In [None]:
# Initialize random vector
x_keep = Variable(Tensor(np.random.normal(0, 1, (1, opt.latent_dim))),requires_grad=False)

# optimizer = torch.optim.SGD([x],lr = opt2.lr)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10000,20000], gamma=0.5)

# Freeze the model
for param in generator.parameters():
    param.requires_grad = False

In [None]:
# Initial guess
# im is the target
with torch.no_grad():
    im_first = generator(x_keep.to('cuda'))
    plot_img(im_first,True)

In [None]:
# after optimizing
with torch.no_grad():
    plot_img(im_g,True)

In [None]:
# Optimize once, given optimizer parameters
x = x_keep.clone().requires_grad_()
optimizer = torch.optim.SGD([x],lr=0.01)
loss_list, im_g = grad_loop(im,x,generator,optimizer,scheduler=None,n_epochs=10000,epsilon = .01)

#### Plot loss curves for different optimizers

In [None]:
op_list = [torch.optim.SGD,torch.optim.Adam]
lr_list = [1e-5,1e-4,1e-3,1e-2]
loss_dict = convergence_loop(op_list,lr_list,generator,x_keep,im)

In [None]:
plot_loss_curve(loss_dict,title='Learning curves for DCGAN on image in training set')

In [None]:
# Compare loss across optimzers at a given epoch
for k in loss_dict.keys():
    print(k,": ",loss_dict[k][5])

# BigGAN Test

In [None]:
from pytorch_pretrained_biggan import (BigGAN, one_hot_from_names, truncated_noise_sample,
                                       save_as_images, display_in_terminal)

# Load pre-trained model tokenizer (vocabulary)
model = BigGAN.from_pretrained('biggan-deep-128')

# Prepare a input
truncation = 0.4
class_vector = one_hot_from_names(['koala'], batch_size=1)
z = truncated_noise_sample(truncation=truncation, batch_size=1)

# All in tensors
z = torch.from_numpy(z)
class_vector = torch.from_numpy(class_vector)

# If you have a GPU, put everything on cuda
z = z.to('cuda')
class_vector = class_vector.to('cuda')
model.to('cuda')

# Freeze the model
for param in model.parameters():
    param.requires_grad = False

def bigGAN_wrapper(noise_vector,class_vector=class_vector,model=model):
    # Generate an image
    im = model(noise_vector, class_vector, truncation)
    im.to('cpu')
    return im

In [None]:
im = bigGAN_wrapper(z)
plt.imshow(im.to('cpu')[0].permute(1,2,0))

In [None]:
# Initialize random vector
x_keep = truncated_noise_sample(truncation=truncation, batch_size=1)
# All in tensors
x_keep = torch.from_numpy(x_keep).to('cuda')

In [None]:
with torch.no_grad():
    plt.imshow(model(x_keep,class_vector,truncation).to('cpu')[0].permute(1,2,0))

In [None]:
with torch.no_grad():
    plt.imshow(im_g.to('cpu')[0].permute(1,2,0))

In [None]:
# Testing out
x = x_keep.clone().requires_grad_()

In [None]:
optimizer = torch.optim.Adam([x],lr=0.01)
loss_list, im_g,x_out = grad_loop(im,x,bigGAN_wrapper,optimizer,n_epochs=100000)

In [None]:
optimizer = torch.optim.AdamW([x],lr=.0001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[500,5000,50000], gamma=0.5)
loss_list_adam, im_g_adam,x_out = grad_loop(im,x,bigGAN_wrapper,optimizer,scheduler=scheduler,n_epochs=10000)

In [None]:
plt.figure(figsize=[15,9])
for k in loss_dict.keys():
        plt.plot(loss_dict[k],label=str(k))
plt.legend(loc='upper right',fontsize=15)
plt.ylabel("MSE Loss per pixel",fontsize=12)
plt.xlabel("Epoch",fontsize=12)
plt.title("Learning curves for BigGAN in \"Coffee\" class",fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

In [None]:
print("Latent Norm initial: ",torch.norm(z-x_keep))
print("Latent Norm final: ",torch.norm(z-x_out))

In [None]:
# Main test loop! 
op_list = [torch.optim.SGD,torch.optim.Adam]
lr_list = [1e-5,1e-4,1e-3,1e-2]
loss_dict = convergence_loop(op_list,lr_list,bigGAN_wrapper,x_keep,real_image)

In [None]:
for k in loss_dict.keys():
    print(k,": ",loss_dict[k][999])

## Test on image not from Generator

### BigGAN

In [None]:
from PIL import Image
im = Image.open("koala_bear.jpg").resize((128,128))
real_image = (torch.Tensor(np.array(im))/128).unsqueeze(0).permute(0,3,1,2).to('cuda')

In [None]:
class_vector = one_hot_from_names(['koala'], batch_size=1)
class_vector = torch.from_numpy(class_vector)
class_vector = class_vector.to('cuda')

In [None]:
# Initialize random vector
x_keep = truncated_noise_sample(truncation=truncation, batch_size=1)
# All in tensors
x_keep = torch.from_numpy(x_keep).to('cuda')
# Testing out
x = x_keep.clone().requires_grad_()

In [None]:
# Main test loop! 
op_list = [torch.optim.SGD,torch.optim.Adam]
# op_list = [torch.optim.Adam]
lr_list = [1e-4,1e-3,1e-2]
loss_dict = convergence_loop(op_list,lr_list,bigGAN_wrapper,x_keep,real_image)

In [None]:
loss_dict

In [None]:
for k in loss_dict.keys():
    print(k,": ",loss_dict[k][-1])

In [None]:
optimizer = torch.optim.Adam([x],lr=0.01)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[500,5000,8000], gamma=0.5)
loss_list_adam, im_g_adam, x_out = grad_loop(real_image,x,bigGAN_wrapper,optimizer,scheduler=scheduler,n_epochs=5000)

In [None]:
with torch.no_grad():
    im_first = model(x_keep,class_vector,truncation).to('cpu')[0].permute(1,2,0)
    im_first = im_first *0.5+0.5
    plt.imshow(im_first)

In [None]:
real_image = real_image*0.5 + 0.5
plt.imshow(real_image.to('cpu').detach()[0].permute(1,2,0))

In [None]:
plt.imshow(im_g_adam.to('cpu').detach()[0].permute(1,2,0))

In [None]:
# Shift the image from [-1,1] to [0,1]
im_g = bigGAN_wrapper(x,class_vector=class_vector,model=model)
im_g_norm = im_g/2 + 1/2
plt.imshow(im_g_norm[0].detach().to('cpu').permute(1,2,0))
plt.imshow((real_image[0]).to('cpu').permute(1,2,0))

### DCGAN

#### CIFAR10

In [None]:
os.makedirs("data/mnist", exist_ok=True)
dataloader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        "data/cifar10",
        train=True,
        download=True,
        transform=transforms.Compose(
            [transforms.Resize(opt.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
        ),
    ),
    batch_size=opt.batch_size,
    shuffle=True,
)

#### MNIST

In [None]:
# Configure data loader
os.makedirs("data/mnist", exist_ok=True)
dataloader = torch.utils.data.DataLoader(
    datasets.MNIST(
        "data/mnist",
        train=True,
        download=True,
        transform=transforms.Compose(
            [transforms.Resize(opt.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
        ),
    ),
    batch_size=1,
    shuffle=True,
)

#### Test

In [None]:
img_new,_ = next(iter(dataloader))

In [None]:
# Initialize random vector
x_keep = Variable(Tensor(np.random.normal(0, 1, (1, opt.latent_dim))))
x = x_keep.clone().requires_grad_()

# Freeze the model
for param in generator.parameters():
    param.requires_grad = False

optimizer = torch.optim.SGD([x],lr = opt2.lr)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10000,20000], gamma=0.5)

In [None]:
loss_list = []
im_list = []
# x_list = []
for i in range(100):
    x_keep = Variable(Tensor(np.random.normal(0, 1, (1, opt.latent_dim))))
    x = x_keep.clone().requires_grad_()

    optimizer = torch.optim.SGD([x],lr=0.0001)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[500,5000,8000], gamma=0.5)
    ll, im_g = grad_loop(img_new.to('cuda'),x,generator,optimizer,scheduler=None,n_epochs=30000)
    loss_list.append(ll)
    im_list.append(im_g)
#     x_list.append(x_g)

In [None]:
op_list = [torch.optim.SGD,torch.optim.Adam]
lr_list = [1e-5,1e-4,1e-3,1e-2]
loss_dict = convergence_loop(op_list,lr_list,generator,x_keep,img_new.to('cuda'))

In [None]:
plot_loss_curve(loss_dict,title='Learning curves for DCGAN on image NOT in training set')

In [None]:
z.min()

In [None]:
plot_img(im_g,True)

In [None]:
for epoch in range(opt2.n_epochs):
    optimizer.zero_grad()
    
    im_g = generator(x)
    loss = F.mse_loss(im_g,img_new,reduction = 'sum') / (im.size(0)*im.size(1))
    loss.backward(retain_graph=True)
    optimizer.step()
    scheduler.step()
    if epoch % 100 == 0:
        print("Epoch {}\tMSE Loss: {:2.4f}".format(epoch,loss))
    if loss < opt2.epsilon:
        break

In [None]:
plt.imshow(img_new.detach().to('cpu')[0,0])

In [None]:
plt.imshow(im_g.detach().to('cpu')[0,0])

## Training DCGAN

In [None]:
def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)


class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.init_size = opt.img_size // 4
        self.l1 = nn.Sequential(nn.Linear(opt.latent_dim, 128 * self.init_size ** 2))

        self.conv_blocks = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, opt.channels, 3, stride=1, padding=1),
            nn.Tanh(),
        )

    def forward(self, z):
        out = self.l1(z)
        out = out.view(out.shape[0], 128, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        def discriminator_block(in_filters, out_filters, bn=True):
            block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
            if bn:
                block.append(nn.BatchNorm2d(out_filters, 0.8))
            return block

        self.model = nn.Sequential(
            *discriminator_block(opt.channels, 16, bn=False),
            *discriminator_block(16, 32),
            *discriminator_block(32, 64),
            *discriminator_block(64, 128),
        )

        # The height and width of downsampled image
        ds_size = opt.img_size // 2 ** 4
        self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())

    def forward(self, img):
        out = self.model(img)
        out = out.view(out.shape[0], -1)
        validity = self.adv_layer(out)

        return validity

In [None]:
# Loss function
adversarial_loss = torch.nn.BCELoss()

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()

if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()

# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)

# Configure data loader
os.makedirs("data/mnist", exist_ok=True)
dataloader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        "data/cifar10",
        train=True,
        download=True,
        transform=transforms.Compose(
            [transforms.Resize(opt.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
        ),
    ),
    batch_size=opt.batch_size,
    shuffle=True,
)
# MNIST
# # Configure data loader
# os.makedirs("data/mnist", exist_ok=True)
# dataloader = torch.utils.data.DataLoader(
#     datasets.MNIST(
#         "data/mnist",
#         train=True,
#         download=True,
#         transform=transforms.Compose(
#             [transforms.Resize(opt.img_size), transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
#         ),
#     ),
#     batch_size=opt.batch_size,
#     shuffle=True,
# )
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

In [None]:
for epoch in range(opt.n_epochs):
    for i, (imgs, _) in enumerate(dataloader):

        # Adversarial ground truths
        valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
        fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(Tensor))

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise as generator input
        z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))))

        # Generate a batch of images
        gen_imgs = generator(z)

        # Loss measures generator's ability to fool the discriminator
        g_loss = adversarial_loss(discriminator(gen_imgs), valid)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = adversarial_loss(discriminator(real_imgs), valid)
        fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(
            "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
            % (epoch, opt.n_epochs, i, len(dataloader), d_loss.item(), g_loss.item())
        )

        batches_done = epoch * len(dataloader) + i
        if batches_done % opt.sample_interval == 0:
            save_image(gen_imgs.data[:25], "images/%d.png" % batches_done, nrow=5, normalize=True)


In [None]:
imgs.shape

In [None]:
imgs = imgs*0.5 + 0.5
plt.imshow(imgs[0,:,:,:].detach().to('cpu').permute(1,2,0))

In [None]:
z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))))
im_g = generator(z)
im_g = im_g*0.5 + 0.5
plt.imshow(im_g[0,:,:,:].detach().to('cpu').permute(1,2,0))

In [None]:
z

In [None]:
# Saving model
MODEL_PATH = "models/dcgan/generator_cifar10.pth"
torch.save(generator.state_dict(), MODEL_PATH)