In [1]:
import torch
import numpy as np
import torchvision

In [2]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, Dataset

In [3]:
DATA = "data/"

In [4]:
%%capture
mnist_ds= torchvision.datasets.MNIST(DATA, download=True)

In [5]:
import torchvision.transforms as transforms

Make all desired image transformations here

In [6]:
transform = transforms.Compose([
                    transforms.ToTensor()
])

custom DS for loading tensors and labels

In [7]:
class MNISTDS(Dataset):
    def __init__(self, mnist_ds, transform):
        super().__init__()
        self.mnist_ds= mnist_ds
        self.transform = transform
    def __len__(self):
        return len(self.mnist_ds)
    def __getitem__(self, i):
        return self.transform(self.mnist_ds[i][0]), self.mnist_ds[i][1]
    

In [8]:
ds = MNISTDS(mnist_ds, transform)

Define the model

In [10]:
import torch.nn as nn

The DCGAN architecture is a bunch of stacked "transposed convolutions". If like me, you're wondering what the hell a tranposed convolution is, see [here](https://github.com/vdumoulin/conv_arithmetic) for some very helpful visualizations. The punchline is: it's an ordinary convolution, but where the *stride* is used to "inflate" the input image before feeding it to the conv filter, so that the outputs can end up being larger in spatial extent than the inputs.

I'm basically copying the [example pytorch implementation of DCGAN](https://github.com/pytorch/examples/blob/master/dcgan/main.py)

In [60]:
class Generator(nn.Module):
    def __init__(self, latent_size):
        """latent_size = size of the latent space"""
        super(Generator, self).__init__()
        
        self.latent_size = latent_size
        
        #spatial extent at each layer
        size = [4, 8, 16, 32]
        #kernel size
        self.kernel_size =4
        #(proportional to the) number of generator filters
        self.ngf = 64
        
        #takes a latent vector and outputs MNIST-sized image
        #input: (_, s, 1, 1) latent vector
        self.upsample = nn.Sequential(
            
                                nn.ConvTranspose2d(self.latent_size, 4 * self.ngf, self.kernel_size,
                                                      stride=1,padding=0), 
                                nn.BatchNorm2d( 4 * self.ngf), 
                                nn.ReLU(),
                                #spatial extent here is set by the kernel: (4,4)
                                
                                #by setting stride=2, we effectively double the output size (up to fiddling
                                #with the boundary conditions..)
                                # Weirdly, increasing the 'padding' arg actually decreases the amount of padding 
                                #that's applied to the input. the only reason padding is being used here is to
                                #keep the output shapes at nice multiples of two
                                nn.ConvTranspose2d(4 * self.ngf, 2 * self.ngf, self.kernel_size,
                                                      stride=2,padding=1), 
                                nn.BatchNorm2d( 2 * self.ngf), 
                                nn.ReLU(),
                                
                                #( 8,8)
                                nn.ConvTranspose2d(2 * self.ngf, 1 * self.ngf, self.kernel_size,
                                                      stride=2,padding=1), 
                                nn.BatchNorm2d( 1 * self.ngf), 
                                nn.ReLU(),
                                #(16,16)
                                #here I'm increasing the padding to bring the output size to (28,28)
                                #for MNIST
                                nn.ConvTranspose2d(self.ngf, 1, self.kernel_size,
                                                      stride=2,padding=3), 
                                nn.Tanh(),
                                #(32,32)
                                
                                                                                            
                                )
        
    def forward(self, z):
        """Input: (_, latent_size) noise tensor
            Output: (_, 1, 32, 32) generated image tensor"""
        z = z.view(-1, self.latent_size, 1, 1)
        return self.upsample(z)

In [114]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        
        #scaling for the number of filters
        self.nf = 64
        # kernel size 
        self.kernel_size = 4
        #input (1,28,28)
        
        #this is more or less the generator stack run in reverse
        # a stride of 2 and padding of 1 causes the spatial extent to halve at each step
        self.main = nn.Sequential(
            nn.Conv2d(1, self.nf, self.kernel_size, stride=2,padding=3),
            nn.LeakyReLU(.2),
            nn.Conv2d(self.nf, 2 * self.nf, self.kernel_size, stride=2, padding=1),
            nn.BatchNorm2d(2 * self.nf),
            nn.LeakyReLU(.2),
            nn.Conv2d(2*self.nf, 4 * self.nf, self.kernel_size, stride=2, padding=1),
            nn.BatchNorm2d(4 * self.nf),
            nn.LeakyReLU(.2),
            nn.Conv2d(4*self.nf, 1 , self.kernel_size, stride=1, padding=0),
            nn.Sigmoid()
            
        )
        
    def forward(self, x):
        """Input: (_, 1, 28, 28) image
            Output: (_, 1) classification tensor"""
        x = x.view(-1, 1, 28,28)
        return self.main(x).view(-1, 1)

In [115]:
nz = 10
z = torch.ones(1,nz,1,1)

In [116]:
y = torch.ones(3, 1, 28,28)

In [117]:
G = Generator(nz)
D = Discriminator()

In [118]:
x = D(y)