# Deep Convolutional GANs

In [47]:
%pip install matplotlib numpy 

Note: you may need to restart the kernel to use updated packages.


In [48]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [49]:
# MNIST 

img_rows, img_cols, channels = 28, 28, 1
img_shape = (channels, img_rows, img_cols)

z_dim = 100 # Dimension vom Rauschvektor
batch_size = 128
iterations = 20000



In [50]:
transform = transforms.Compose([
    transforms.ToTensor(), # [0,1]
    transforms.Normalize((0.5,), (0.5,)) # -> [-1, 1] 
])

mnist = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

dataloader = DataLoader(
    mnist,
    batch_size=batch_size,
    shuffle=True
)

In [67]:
class Discriminator(nn.Module):
  
    def __init__(self, img_shape):
        super().__init__()

        self.conv_blocks = nn.Sequential(
            # (1, 28, 28) -> (64, 14, 14)
            nn.Conv2d(img_shape[0], 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # (64, 14, 14) -> (128, 7, 7)
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(128 * 7 * 7, 1),
            nn.Sigmoid()
        )

    def forward(self, img):
        out = self.conv_blocks(img)
        out = self.flatten(out)
        validity = self.fc(out)
        return validity

class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(z_dim, 128*7*7), # vom Rauschvektor zum Feature-Map
            nn.LeakyReLU(0.01)
        )
        self.deconv_blocks = nn.Sequential(
            # (128, 7, 7) -> (64, 14, 14)
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.01),
            # (64, 14, 14) -> (1, 28, 28)
            nn.ConvTranspose2d(64, img_cols, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )
    def forward(self, z):
        x = self.fc(z)
        x = x.view(-1, 128, 7, 7) # Reshape in Feature-Map
        return self.deconv_blocks(x)
    

generator = Generator()
discriminator = Discriminator(img_shape)

BCE = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=1e-4)
optimizer_D = optim.Adam(discriminator.parameters(), lr=1e-4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("mps")

#generator.to(device)
#discriminator.to(device)

In [68]:
def sample_images(generator, z_dim, image_grid_rows=4, image_grid_columns=4):
    generator.eval()
    with torch.no_grad():
        z = torch.randn(image_grid_rows * image_grid_columns, z_dim)
        gen_imgs = generator(z).cpu()

    gen_imgs = (gen_imgs + 1) / 2

    fig, axs = plt.subplots(
        image_grid_rows,
        image_grid_columns,
        figsize=(4, 4),
        sharex=True,
        sharey=True
    )

    cnt = 0
    for i in range(image_grid_rows):
        for j in range(image_grid_columns):
            axs[i, j].imshow(gen_imgs[cnt, 0, :, :], cmap='gray')
            axs[i, j].axis('off')
            cnt += 1
    plt.show()

In [69]:
# Training Loop
def train(iterations):
    iteration = 0
    while iteration < iterations:
        for real_imgs, _ in dataloader: # echte Bilder aus dem Datensatz
            iteration += 1
            batch_size_curr = real_imgs.size(0)

            # Labels für echte und gefälschte Bilder
            real = torch.ones(batch_size_curr, 1)
            fake = torch.zeros(batch_size_curr, 1)
            # L_D(G,D) = BCE(D(x^{real}),1) + BCE(D(G(z)),0)
            # Trainiere Discriminator
            discriminator.train()
            optimizer_D.zero_grad()

            # Part 1 - BCE(D(x^{real}),1)
            real_outputs = discriminator(real_imgs)
            d_loss_real = BCE(real_outputs, real)

            # Part 2 - BCE(D(G(z)),0)
            z = torch.randn(batch_size_curr, z_dim)
            fake_imgs = generator(z)
            fake_outputs = discriminator(fake_imgs.detach()) # detach - keine Gradienten für G
            d_loss_fake = BCE(fake_outputs, fake)

            d_loss = 0.5*(d_loss_real + d_loss_fake) # Gesamter Verlust für D = L_D(G,D) , gemittelt
            d_loss.backward()
            optimizer_D.step()

            # Trainiere Generator
            generator.train()
            optimizer_G.zero_grad()

            # L_G = BCE(D(G(z)),1)
            z = torch.randn(batch_size_curr, z_dim)
            fake_imgs = generator(z)
            fake_outputs = discriminator(fake_imgs) # keine detach - Gradienten für G

            g_loss = BCE(fake_outputs,real) 
            # Generator will, dass D die gefälschten Bilder als echt klassifiziert
            g_loss.backward()
            optimizer_G.step()

            if iteration % 1000 == 0:
                print(f"Iteration {iteration}, D Loss: {d_loss.item()}, G Loss: {g_loss.item()}")
                sample_images(generator, z_dim)
            
            if iteration >= iterations:
                break

In [70]:
train(iterations)

RuntimeError: Given groups=1, weight of size [64, 1, 4, 4], expected input[128, 28, 28, 28] to have 1 channels, but got 28 channels instead

In [72]:
class Generator(nn.Module):
    """
    z (batch, z_dim) -> (batch, 1, 28, 28)
    """
    def __init__(self, z_dim, img_shape):
        super().__init__()
        self.z_dim = z_dim
        self.img_shape = img_shape
        self.init_size = img_shape[1] // 4  # 28 -> 7

        self.fc = nn.Sequential(
            nn.Linear(z_dim, 256 * self.init_size * self.init_size),
            nn.BatchNorm1d(256 * self.init_size * self.init_size),
            nn.ReLU(True)
        )

        self.conv_blocks = nn.Sequential(
            # (256, 7, 7) -> (128, 14, 14)
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),

            # (128, 14, 14) -> (1, 28, 28)
            nn.ConvTranspose2d(128, img_shape[0], kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, z):
        out = self.fc(z)
        out = out.view(z.size(0), 256, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img


class Discriminator(nn.Module):
  
    def __init__(self, img_shape):
        super().__init__()

        self.conv_blocks = nn.Sequential(
            # (1, 28, 28) -> (64, 14, 14)
            nn.Conv2d(img_shape[0], 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),

            # (64, 14, 14) -> (128, 7, 7)
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True)
        )

        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(128 * 7 * 7, 1),
            nn.Sigmoid()
        )

    def forward(self, img):
        out = self.conv_blocks(img)
        out = self.flatten(out)
        validity = self.fc(out)
        return validity

generator = Generator(z_dim, img_shape).to(device)
discriminator = Discriminator(img_shape).to(device)

#generator.apply(weights_init_normal)
#discriminator.apply(weights_init_normal)

BCE = nn.BCELoss()

lr = 1e-4

optimizer_G = optim.Adam(generator.parameters(), lr=lr)
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr)


In [73]:
train(iterations)

RuntimeError: Mismatched Tensor types in NNPack convolutionOutput