# ðŸŽ¨ Generative Models: GANs, VAE & StyleGAN

Image generation with state-of-the-art generative models.

## Learning Outcomes
- GAN fundamentals and training
- VAE for latent space exploration
- StyleGAN for high-quality generation
- CycleGAN for image translation

**Level**: Advanced | **Time**: 90 min | **GPU**: Required

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

## 1. Basic GAN Architecture

In [None]:
class Generator(nn.Module):
    def __init__(self, latent_dim=100, img_shape=(1, 28, 28)):
        super().__init__()
        self.img_shape = img_shape
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(256),
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(512),
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(1024),
            nn.Linear(1024, int(np.prod(img_shape))),
            nn.Tanh()
        )
    
    def forward(self, z):
        img = self.model(z)
        return img.view(img.size(0), *self.img_shape)

class Discriminator(nn.Module):
    def __init__(self, img_shape=(1, 28, 28)):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(int(np.prod(img_shape)), 512),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )
    
    def forward(self, img):
        return self.model(img.view(img.size(0), -1))

latent_dim = 100
generator = Generator(latent_dim).to(device)
discriminator = Discriminator().to(device)
print(f"Generator params: {sum(p.numel() for p in generator.parameters()):,}")

## 2. GAN Training

In [None]:
# Load MNIST
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])
mnist = datasets.MNIST('./data', train=True, download=True, transform=transform)
dataloader = DataLoader(mnist, batch_size=64, shuffle=True)

# Optimizers
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
criterion = nn.BCELoss()

# Training
print("Training GAN...")
for epoch in range(5):
    for i, (real_imgs, _) in enumerate(dataloader):
        if i > 100: break  # Quick demo
        
        real_imgs = real_imgs.to(device)
        batch_size = real_imgs.size(0)
        
        # Labels
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)
        
        # Train Discriminator
        d_optimizer.zero_grad()
        d_real = discriminator(real_imgs)
        d_real_loss = criterion(d_real, real_labels)
        
        z = torch.randn(batch_size, latent_dim).to(device)
        fake_imgs = generator(z)
        d_fake = discriminator(fake_imgs.detach())
        d_fake_loss = criterion(d_fake, fake_labels)
        
        d_loss = d_real_loss + d_fake_loss
        d_loss.backward()
        d_optimizer.step()
        
        # Train Generator
        g_optimizer.zero_grad()
        g_loss = criterion(discriminator(fake_imgs), real_labels)
        g_loss.backward()
        g_optimizer.step()
    
    print(f"Epoch {epoch+1}: D_loss={d_loss.item():.4f}, G_loss={g_loss.item():.4f}")

## 3. Variational Autoencoder (VAE)

In [None]:
class VAE(nn.Module):
    def __init__(self, latent_dim=20):
        super().__init__()
        self.latent_dim = latent_dim
        
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(784, 400),
            nn.ReLU()
        )
        self.fc_mu = nn.Linear(400, latent_dim)
        self.fc_var = nn.Linear(400, latent_dim)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 400),
            nn.ReLU(),
            nn.Linear(400, 784),
            nn.Sigmoid()
        )
    
    def encode(self, x):
        h = self.encoder(x.view(-1, 784))
        return self.fc_mu(h), self.fc_var(h)
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        return self.decoder(z)
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

def vae_loss(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

vae = VAE(latent_dim=20).to(device)
print(f"VAE params: {sum(p.numel() for p in vae.parameters()):,}")

## 4. StyleGAN (Pretrained)

In [None]:
# Using pretrained StyleGAN from torch hub
try:
    # Note: StyleGAN requires specific setup
    print("StyleGAN2 key concepts:")
    print("  - Progressive growing for stability")
    print("  - Style-based synthesis")
    print("  - Adaptive instance normalization (AdaIN)")
    print("  - Latent space manipulation for editing")
except:
    pass

# Latent space interpolation demo
def interpolate_latent(generator, z1, z2, steps=10):
    """Interpolate between two latent vectors."""
    images = []
    for alpha in np.linspace(0, 1, steps):
        z = (1 - alpha) * z1 + alpha * z2
        with torch.no_grad():
            img = generator(z)
        images.append(img)
    return images

# Demo interpolation
z1 = torch.randn(1, latent_dim).to(device)
z2 = torch.randn(1, latent_dim).to(device)
interp_imgs = interpolate_latent(generator, z1, z2, 5)
print(f"Generated {len(interp_imgs)} interpolated images")

## 5. Model Comparison

In [None]:
import pandas as pd

comparison = pd.DataFrame({
    'Model': ['GAN', 'DCGAN', 'VAE', 'StyleGAN2', 'Diffusion'],
    'Quality (FIDâ†“)': [50, 30, 70, 3, 2],
    'Training': ['Hard', 'Medium', 'Easy', 'Hard', 'Medium'],
    'Latent Control': ['Low', 'Medium', 'High', 'Very High', 'Medium'],
    'Best For': ['Basic', 'Images', 'Latent Space', 'Faces', 'Quality']
})

print("ðŸ“Š Generative Model Comparison:")
display(comparison)

## 6. Applications

In [None]:
applications = [
    "ðŸŽ¨ Art Generation - Create unique artwork",
    "ðŸ“¸ Data Augmentation - Generate training data",
    "ðŸ‘¤ Face Generation - Synthetic avatars",
    "ðŸ”„ Image-to-Image - Style transfer",
    "ðŸŽ® Game Assets - Procedural content",
    "ðŸ‘— Fashion - Virtual try-on"
]

print("ðŸš€ GAN Applications:")
for app in applications:
    print(f"  {app}")

## ðŸŽ¯ Key Takeaways
1. GANs: adversarial training (generator vs discriminator)
2. VAE: smooth latent space, explicit distribution
3. StyleGAN: style-based control, high quality
4. Diffusion models: new SOTA for quality

## ðŸ“š Further Reading
- Goodfellow et al., "Generative Adversarial Networks" (2014)
- Kingma & Welling, "Auto-Encoding Variational Bayes" (2013)
- Karras et al., "A Style-Based Generator Architecture" (2019)