# Module 6: Image Generation with Duffusion Models



In [None]:
!pip install -qqq torchview einops matplotlib ipywidgets numpy opencv_python PyYAML torch torchvision tqdm imageio

In [None]:
%matplotlib inline

# 6.1 Images and Convolutions - a recap

# Understanding Transpose Convolution

## Introduction

Transpose convolution, also known as fractionally strided convolution or deconvolution (although this last term is mathematically incorrect), is a crucial operation in many deep learning architectures, particularly in tasks involving upsampling, such as image segmentation and generative models.

![Diagram: Standard Convolution vs Transpose Convolution]


## Basic Concept

While standard convolution typically reduces the spatial dimensions of its input, transpose convolution does the opposite - it increases the spatial dimensions. This makes it particularly useful for tasks that require expanding the spatial resolution of features.

### Key Points:

1. Transpose convolution is not the mathematical inverse of convolution.
2. It's a learnable upsampling technique.
3. It's often used in the decoder part of autoencoder architectures.

## How Transpose Convolution Works

Let's break down the process step-by-step:

1. **Input**: Start with a smaller input feature map.
   
   ![Input Feature Map]
   (Show a small 2x2 grid with values, e.g., [[1, 2], [3, 4]])

2. **Kernel**: Define a learnable kernel (also called filter or weight matrix).
   
   ![Kernel]
   (Show a 2x2 grid with values, e.g., [[1, 2], [3, 4]])

3. **Process**:
   - For each input element, multiply the entire kernel by that element.
   - Place the result in the output, with the top-left corner aligned with the position of the input element.
   - Repeat for all input elements, summing where results overlap.

   ![Transpose Convolution Process]
   (This should be a series of images showing the step-by-step process of multiplying each input element with the kernel and placing the results in the output)

4. **Output**: The result is a larger output feature map.
   
   ![Output Feature Map]
   (Show the final 3x3 grid with the computed values)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define input image and kernel
input_image = np.array([
    [0, 0, 0, 0, 0],
    [0, 1, 2, 3, 0],
    [0, 4, 5, 6, 0],
    [0, 7, 8, 9, 0],
    [0, 0, 0, 0, 0]
])

kernel = np.array([
    [1, 0, -1],
    [2, 0, -2],
    [1, 0, -1]
])

# Manual convolution function
def convolve2d(image, kernel):
    output = np.zeros_like(image)
    padded_image = np.pad(image, 1, mode='constant')
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            output[i, j] = np.sum(padded_image[i:i+3, j:j+3] * kernel)
    return output

# Manual transpose convolution function
def transpose_convolve2d(image, kernel):
    output = np.zeros((image.shape[0]+2, image.shape[1]+2))
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            output[i:i+3, j:j+3] += image[i, j] * kernel
    return output

# Perform convolution and transpose convolution
conv_output = convolve2d(input_image, kernel)
trans_conv_output = transpose_convolve2d(input_image, kernel)

# Visualization function
def visualize(title, images):
    fig, axs = plt.subplots(1, len(images), figsize=(20, 5))
    fig.suptitle(title)

    for i, (name, img) in enumerate(images.items()):
        im = axs[i].imshow(img, cmap='viridis')
        axs[i].set_title(name)
        axs[i].axis('off')
        plt.colorbar(im, ax=axs[i])

    plt.tight_layout()
    plt.show()

# Visualize results
visualize("Convolution and Transpose Convolution", {
    "Input": input_image,
    "Kernel": kernel,
    "Convolution Output": conv_output,
    "Transpose Convolution Output": trans_conv_output
})

# Print shapes
print("Input shape:", input_image.shape)
print("Kernel shape:", kernel.shape)
print("Convolution output shape:", conv_output.shape)
print("Transpose convolution output shape:", trans_conv_output.shape)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define a smaller input image for clearer visualization
input_image = np.array([
    [1, 2],
    [3, 4]
])

# Define the kernel (also called the filter or weight matrix)
kernel = np.array([
    [1, 2],
    [3, 4]
])

def detailed_transpose_convolve2d(image, kernel):
    # Create an output array with dimensions increased by 1 in each direction
    output = np.zeros((image.shape[0]+1, image.shape[1]+1))
    steps = []

    # Iterate over each element in the input image
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            # Create a temporary array for this step
            step = np.zeros_like(output)
            # Multiply the current input element by the entire kernel
            multiplied_kernel = image[i, j] * kernel
            # Place the result in the appropriate position
            step[i:i+2, j:j+2] = multiplied_kernel
            # Add this step to the output
            output += step
            # Store intermediate steps for visualization
            steps.append((f"Input[{i},{j}] = {image[i,j]}", multiplied_kernel))
            steps.append((f"Step {i}-{j}", step.copy()))

    # Add the final output to the steps
    steps.append(("Final Output", output))
    return output, steps

# Perform transpose convolution
trans_conv_output, steps = detailed_transpose_convolve2d(input_image, kernel)

def visualize_steps(title, steps):
    n_steps = len(steps)
    # Create a figure with subplots for each step
    fig, axs = plt.subplots(1, n_steps, figsize=(6*n_steps, 6))
    fig.suptitle(title, fontsize=20)

    # Iterate over each step
    for i, (name, img) in enumerate(steps):
        # Display the image using a colormap
        im = axs[i].imshow(img, cmap='viridis')
        axs[i].set_title(name, fontsize=14)
        axs[i].axis('off')
        # Add a colorbar to show the scale
        plt.colorbar(im, ax=axs[i], fraction=0.046, pad=0.04)

        # Add text annotations for the values
        for (j,k), value in np.ndenumerate(img):
            axs[i].text(k, j, f'{value:.1f}', ha='center', va='center',
                        color='white' if value > np.mean(img) else 'black',
                        fontweight='bold', fontsize=12)

    plt.tight_layout()
    plt.show()

# Visualize input and kernel
visualize_steps("Input and Kernel", [("Input", input_image), ("Kernel", kernel)])

# Visualize transpose convolution steps
visualize_steps("Transpose Convolution Steps", steps)

# Print shapes and provide explanations
print("Input shape:", input_image.shape)
print("Explanation: This is a 2x2 input image.")
print("\nKernel shape:", kernel.shape)
print("Explanation: This is a 2x2 kernel used for the transpose convolution.")
print("\nTranspose convolution output shape:", trans_conv_output.shape)
print("Explanation: The output is larger (3x3) due to the nature of transpose convolution.")

# Provide a detailed explanation of the process
print("\nDetailed Explanation of Transpose Convolution:")
print("1. We start with a 2x2 input and a 2x2 kernel.")
print("2. For each input pixel, we multiply the entire kernel by that pixel's value.")
print("3. We then place this multiplied kernel in the output, with its top-left corner aligned with the position of the input pixel.")
print("4. We do this for all input pixels, summing the results where they overlap.")
print("5. This process naturally results in an output that is larger than the input (3x3 in this case).")
print("6. The expansion of the input and the overlapping summations allow the transpose convolution to 'learn' how to upsample or increase the spatial dimensions of the input.")

## Generative Adversarial Networks

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

# Hyperparameters
batch_size = 128
lr = 0.0005
nz = 100  # size of the latent z vector
num_epochs = 10

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

#### A simple GAN

In [None]:
# !rm -rf ./gan_output

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import imageio
import os

# Hyperparameters
batch_size = 128
lr = 0.0002
nz = 100  # size of the latent z vector
num_epochs = 10  # Increased for better visualization
ngf = 64  # number of generator filters
ndf = 64  # number of discriminator filters

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Generator network
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(nz, 256),
            nn.ReLU(True),
            nn.Linear(256, 512),
            nn.ReLU(True),
            nn.Linear(512, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 28*28),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x).view(-1, 1, 28, 28)

# Discriminator network
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(28*28, 1024),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x.view(-1, 28*28))

# Initialize models
G = Generator().to(device)
D = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizerD = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))
optimizerG = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))

# Function to generate and save images
def save_generator_output(G, fixed_noise, epoch, output_dir):
    G.eval()
    fake_images = G(fixed_noise)
    fake_images = fake_images.view(-1, 28, 28)
    fake_images = fake_images.detach().cpu().numpy()

    fig, axes = plt.subplots(4, 4, figsize=(5, 5))
    for i, ax in enumerate(axes.flat):
        ax.imshow(fake_images[i], cmap='gray')
        ax.axis('off')

    plt.suptitle(f"Epoch {epoch}")
    plt.savefig(f"{output_dir}/epoch_{epoch}.png")
    plt.close()
    G.train()

# Create output directory
output_dir = "gan_output"
os.makedirs(output_dir, exist_ok=True)

# Fixed noise for visualization
fixed_noise = torch.randn(16, nz, device=device)

# Training loop
for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(train_loader):
        # Update discriminator: maximize log(D(x)) + log(1 - D(G(z)))
        real_imgs = imgs.to(device)
        b_size = real_imgs.size(0)
        label_real = torch.full((b_size,), 1., device=device)
        label_fake = torch.full((b_size,), 0., device=device)

        output = D(real_imgs).view(-1)
        lossD_real = criterion(output, label_real)
        lossD_real.backward()

        noise = torch.randn(b_size, nz, device=device)
        fake_imgs = G(noise)
        output = D(fake_imgs.detach()).view(-1)
        lossD_fake = criterion(output, label_fake)
        lossD_fake.backward()

        optimizerD.step()
        optimizerD.zero_grad()

        # Update generator: maximize log(D(G(z)))
        output = D(fake_imgs).view(-1)
        lossG = criterion(output, label_real)
        lossG.backward()

        optimizerG.step()
        optimizerG.zero_grad()

        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}] Batch {i}/{len(train_loader)} \
                  Loss D: {lossD_real + lossD_fake:.4f}, loss G: {lossG:.4f}")

    # Save generated images
    save_generator_output(G, fixed_noise, epoch, output_dir)

print("Training finished.")

# Create GIF
images = []
for epoch in range(num_epochs):
    images.append(imageio.imread(f"{output_dir}/epoch_{epoch}.png"))
imageio.mimsave(f"{output_dir}/gan_training.gif", images, duration=0.5)

In [None]:
from IPython.display import HTML, display
import base64

# Get list of image files
import glob
image_files = sorted(glob.glob('gan_output/epoch_*.png'))

# Function to encode images to base64
def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Create HTML with all images
images_base64 = [image_to_base64(image) for image in image_files]
images_html = ''.join([f'<img src="data:image/png;base64,{img}" style="display:none;">' for img in images_base64])

# JavaScript to handle animation
js_animation = """
<script>
var images = document.querySelectorAll("img");
var index = 0;
setInterval(function() {
    images[index].style.display = "none";
    index = (index + 1) % images.length;
    images[index].style.display = "block";
}, 500);  // Change image every 500ms
</script>
"""

# Combine HTML and JavaScript
animation_html = f"""
<div style="width:250px; margin:auto;">
    {images_html}
    {js_animation}
</div>
"""

# Display the animation
display(HTML(animation_html))

#### Generating Samples with the simple GAN

In [None]:
# Inference: Generate new images
G.eval()  # Set the generator to evaluation mode

# Generate a batch of new images
with torch.no_grad():
    noise = torch.randn(16, nz, device=device)  # Generate 16 random noise vectors
    fake_imgs = G(noise).cpu()

# Function to show images
def show_images(images, nrow=4):
    images = (images + 1) / 2  # Rescale from [-1, 1] to [0, 1]
    grid = np.transpose(images.numpy(), (0, 2, 3, 1))
    fig, axes = plt.subplots(nrow, nrow, figsize=(10, 10))
    for i, ax in enumerate(axes.flatten()):
        ax.imshow(grid[i, :, :, 0], cmap='gray')
        ax.axis('off')
    plt.show()

# Display the generated images
show_images(fake_imgs)

### Improving our GAN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import imageio
import os

# Hyperparameters
batch_size = 128
lr = 0.0002
nz = 100  # size of the latent z vector
num_epochs = 10  # Increased for better visualization
ngf = 64  # number of generator filters
ndf = 64  # number of discriminator filters

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Generator network
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(nz, 256),
            nn.ReLU(True),
            nn.Linear(256, 512),
            nn.ReLU(True),
            nn.Linear(512, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 28*28),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x).view(-1, 1, 28, 28)

# Discriminator network
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(28*28, 1024),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x.view(-1, 28*28))

# Initialize models
G = Generator().to(device)
D = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizerD = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))
optimizerG = optim.Adam(G.parameters(), lr=2*lr, betas=(0.5, 0.999))

# Function to generate and save images
def save_generator_output(G, fixed_noise, epoch, output_dir):
    G.eval()
    fake_images = G(fixed_noise)
    fake_images = fake_images.view(-1, 28, 28)
    fake_images = fake_images.detach().cpu().numpy()

    fig, axes = plt.subplots(4, 4, figsize=(5, 5))
    for i, ax in enumerate(axes.flat):
        ax.imshow(fake_images[i], cmap='gray')
        ax.axis('off')

    plt.suptitle(f"Epoch {epoch}")
    plt.savefig(f"{output_dir}/epoch_{epoch}.png")
    plt.close()
    G.train()

# Create output directory
output_dir = "improved_gan_output"
os.makedirs(output_dir, exist_ok=True)

# Fixed noise for visualization
fixed_noise = torch.randn(16, nz, device=device)

# Training loop
for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(train_loader):
        # Update discriminator: maximize log(D(x)) + log(1 - D(G(z)))
        real_imgs = imgs.to(device)
        b_size = real_imgs.size(0)
        label_real = torch.full((b_size,), 0.9, device=device)
        label_fake = torch.full((b_size,), 0., device=device)

        output = D(real_imgs).view(-1)
        lossD_real = criterion(output, label_real)
        lossD_real.backward()

        noise = torch.randn(b_size, nz, device=device)
        fake_imgs = G(noise)
        output = D(fake_imgs.detach()).view(-1)
        lossD_fake = criterion(output, label_fake)
        lossD_fake.backward()

        optimizerD.step()
        optimizerD.zero_grad()

        # Update generator: maximize log(D(G(z)))
        output = D(fake_imgs).view(-1)
        lossG = criterion(output, label_real)
        lossG.backward()

        optimizerG.step()
        optimizerG.zero_grad()

        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}] Batch {i}/{len(train_loader)} \
                  Loss D: {lossD_real + lossD_fake:.4f}, loss G: {lossG:.4f}")

    # Save generated images
    save_generator_output(G, fixed_noise, epoch, output_dir)

print("Training finished.")

# Create GIF
images = []
for epoch in range(num_epochs):
    images.append(imageio.imread(f"{output_dir}/epoch_{epoch}.png"))
imageio.mimsave(f"{output_dir}/gan_training.gif", images, duration=0.5)

In [None]:
from IPython.display import HTML, display
import base64

# Get list of image files
import glob
image_files = sorted(glob.glob('improved_gan_output/epoch_*.png'))

# Function to encode images to base64
def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Create HTML with all images
images_base64 = [image_to_base64(image) for image in image_files]
images_html = ''.join([f'<img src="data:image/png;base64,{img}" style="display:none;">' for img in images_base64])

# JavaScript to handle animation
js_animation = """
<script>
var images = document.querySelectorAll("img");
var index = 0;
setInterval(function() {
    images[index].style.display = "none";
    index = (index + 1) % images.length;
    images[index].style.display = "block";
}, 500);  // Change image every 500ms
</script>
"""

# Combine HTML and JavaScript
animation_html = f"""
<div style="width:250px; margin:auto;">
    {images_html}
    {js_animation}
</div>
"""

# Display the animation
display(HTML(animation_html))

In [None]:
# Inference: Generate new images
G.eval()  # Set the generator to evaluation mode

# Generate a batch of new images
with torch.no_grad():
    noise = torch.randn(16, nz, device=device)  # Generate 16 random noise vectors
    fake_imgs = G(noise).cpu()

# Function to show images
def show_images(images, nrow=4):
    images = (images + 1) / 2  # Rescale from [-1, 1] to [0, 1]
    grid = np.transpose(images.numpy(), (0, 2, 3, 1))
    fig, axes = plt.subplots(nrow, nrow, figsize=(10, 10))
    for i, ax in enumerate(axes.flatten()):
        ax.imshow(grid[i, :, :, 0], cmap='gray')
        ax.axis('off')
    plt.show()

# Display the generated images
show_images(fake_imgs)

### Deep Convolution GAN (DCGAN)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

# Hyperparameters
batch_size = 1024
lr = 0.0002
nz = 100  # size of the latent z vector
num_epochs = 25
ngf = 64  # number of generator filters
ndf = 64  # number of discriminator filters

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
# Create output directory
output_dir = "dcgan_output"
os.makedirs(output_dir, exist_ok=True)
fixed_noise = torch.randn(16, 100, device=device)


def save_generator_output(G, fixed_noise, epoch, output_dir):
    G.eval()
    with torch.no_grad():
        # Reshape the noise vector to 4D: (batch_size, channels, 1, 1)
        fixed_noise = fixed_noise.view(fixed_noise.size(0), -1, 1, 1)
        fake_images = G(fixed_noise)
    fake_images = fake_images.detach().cpu()

    # Denormalize the images
    fake_images = (fake_images + 1) / 2.0  # Rescale from [-1, 1] to [0, 1]
    fake_images = fake_images.clamp(0, 1)

    # Convert to numpy and transpose for correct display
    fake_images = fake_images.numpy().transpose(0, 2, 3, 1)

    fig, axes = plt.subplots(4, 4, figsize=(4, 4))
    for i, ax in enumerate(axes.flat):
        ax.imshow(fake_images[i])
        ax.axis('off')

    plt.suptitle(f"Epoch {epoch}")
    plt.savefig(f"{output_dir}/epoch_{epoch}.png", dpi=300, bbox_inches='tight')
    plt.close()
    G.train()

# Generator network (DCGAN)
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d(ngf * 2, 3, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. 3 x 32 x 32
        )

    def forward(self, x):
        output = self.main(x)
        return output

# Discriminator network (DCGAN)
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # input is (3, 32, 32)
            nn.Conv2d(3, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 16 x 16
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 8 x 8
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 4 x 4
            nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x).view(-1)

# Initialize models
G = Generator().to(device)
D = Discriminator().to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizerD = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))
optimizerG = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))

# Training loop
for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(train_loader):
        # Update discriminator: maximize log(D(x)) + log(1 - D(G(z)))
        real_imgs = imgs.to(device)
        b_size = real_imgs.size(0)
        label_real = torch.full((b_size,), 1.0, device=device)  # Use label smoothing for real labels
        label_fake = torch.full((b_size,), 0.0, device=device)

        # Train with real images
        D.zero_grad()
        output = D(real_imgs).view(-1)
        lossD_real = criterion(output, label_real)
        lossD_real.backward()

        # Train with fake images
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake_imgs = G(noise)
        output = D(fake_imgs.detach()).view(-1)
        lossD_fake = criterion(output, label_fake)
        lossD_fake.backward()
        optimizerD.step()

        # Update generator: maximize log(D(G(z)))
        G.zero_grad()
        label_fake.fill_(0.9)  # Use real labels for generator loss
        output = D(fake_imgs).view(-1)
        lossG = criterion(output, label_fake)
        lossG.backward()
        optimizerG.step()

        # Save generated images
        save_generator_output(G, fixed_noise, epoch, output_dir)

        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}] Batch {i}/{len(train_loader)} \
                  Loss D: {lossD_real + lossD_fake:.4f}, loss G: {lossG:.4f}")

print("Training finished.")

In [None]:
from IPython.display import HTML, display
import base64

# Get list of image files
import glob
image_files = sorted(glob.glob('dcgan_output/epoch_*.png'))

# Function to encode images to base64
def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Create HTML with all images
images_base64 = [image_to_base64(image) for image in image_files]
images_html = ''.join([f'<img src="data:image/png;base64,{img}" style="display:none;">' for img in images_base64])

# JavaScript to handle animation
js_animation = """
<script>
var images = document.querySelectorAll("img");
var index = 0;
setInterval(function() {
    images[index].style.display = "none";
    index = (index + 1) % images.length;
    images[index].style.display = "block";
}, 500);  // Change image every 500ms
</script>
"""

# Combine HTML and JavaScript
animation_html = f"""
<div style="width:250px; margin:auto;">
    {images_html}
    {js_animation}
</div>
"""

# Display the animation
display(HTML(animation_html))

### NOTE: LAB SOLUTION

Improve the DCGAN:

### Improving our DCGAN Generator



```
label_real = torch.full((b_size,), 0.9, device=device)
batch_size = 128
lr = 0.0005
optimizerG = optim.Adam(G.parameters(), lr=1.5*lr, betas=(0.5, 0.999))
```

In [None]:
# Inference: Generate new images
G.eval()  # Set the generator to evaluation mode

# Generate a batch of new images
with torch.no_grad():
    noise = torch.randn(16, nz, 1, 1, device=device)  # Generate 16 random noise vectors
    fake_imgs = G(noise).cpu()

# Function to show images
def show_images(images, nrow=4):
    images = (images + 1) / 2  # Rescale from [-1, 1] to [0, 1]
    grid = np.transpose(images.numpy(), (0, 2, 3, 1))
    fig, axes = plt.subplots(nrow, nrow, figsize=(10, 10))
    for i, ax in enumerate(axes.flatten()):
        ax.imshow(grid[i])
        ax.axis('off')
    plt.show()

# Display the generated images
show_images(fake_imgs)