In [95]:
import torch as t
from typing import Union
from torch import nn
import torch.nn.functional as F
import torchinfo
import plotly.express as px
import plotly.graph_objects as go
from einops import rearrange, reduce, repeat
from einops.layers.torch import Rearrange
from fancy_einsum import einsum
import os
from tqdm.auto import tqdm
from torchvision import transforms, datasets
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader, TensorDataset
import wandb
import w5d1_utils
import w5d1_tests
from dataclasses import dataclass
import pytorch_lightning as pl

#import sys 
#sys.path.append('../common_modules')


In [83]:
def build_convtranspose_layers(
    n_layers,
    in_channels,
    out_channels,
    kernel_size=4,
    stride=2,
    padding=1,
    batch_norm=True,
    activation=nn.ReLU(),
):
    """Builds a sequence of convolutional transpose layers with optional batch 
        normalization and activation layers.

    Args:
        n_layers (int): Number of convolutional transpose layers to build.
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        kernel_size (int): Size of the convolutional kernel.
        stride (int): Stride of the convolution.
        padding (int): Padding of the convolution.
        output_padding (int): Output padding of the convolution.
        batch_norm (bool): Whether to use batch normalization layers.
        activation (nn.Module): Activation layer to use.

    Returns:
        nn.Sequential: A sequence of convolutional transpose layers.
    """
    layers = []
    in_channels = [in_channels // (2**i) for i in range(n_layers)]
    out_channels = in_channels[1:] + [3]
    for i in range(n_layers - 1):
        layers.append(
            nn.ConvTranspose2d(
                in_channels=in_channels[i],
                out_channels=out_channels[i],
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                bias=False,
            )
        )
        if batch_norm:
            layers.append(nn.BatchNorm2d(out_channels[i]))
        if activation is not None:
            layers.append(activation)

    # Last layer
    layers.append(
        nn.ConvTranspose2d(
            in_channels=in_channels[-1],
            out_channels=out_channels[-1],
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=False,
        )
    )
    layers.append(nn.Tanh())

    return nn.Sequential(*layers)


In [84]:
def build_conv_layers(
    n_layers,
    img_channels,
    generator_channels,
    kernel_size=4,
    stride=2,
    padding=1,
    batch_norm=True,
    activation=nn.LeakyReLU(0.2),
):
    """Builds a sequence of convolutional layers with optional batch 
        normalization and activation layers.

    Args:
        n_layers (int): Number of transpose layers to build.
        kernel_size (int): Size of the convolutional kernel.
        stride (int): Stride of the convolution.
        padding (int): Padding of the convolution.
        output_padding (int): Output padding of the convolution.
        batch_norm (bool): Whether to use batch normalization layers.
        activation (nn.Module): Activation layer to use.

    Returns:
        nn.Sequential: A sequence of convolutional transpose layers.
    """
    layers = []

    out_channels = [generator_channels // (2**i) for i in range(n_layers)][::-1]
    in_channels = ([img_channels] + out_channels[:-1])
    # First layer
    layers.append(
        nn.Conv2d(
            in_channels=in_channels[0],
            out_channels=out_channels[0],
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            bias=False,
        )
    )
    layers.append(activation)
    
    for i in range(1, n_layers):
        layers.append(
            nn.Conv2d(
                in_channels=in_channels[i],
                out_channels=out_channels[i],
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
                bias=False
            )
        )
        if batch_norm:
            layers.append(nn.BatchNorm2d(out_channels[i]))
        if activation is not None:
            layers.append(activation)

    return nn.Sequential(*layers)

def test_build_convtranspose_layers():
    """Test function for build_convtranspose_layers."""
    n_layers = 3
    in_channels = 128
    out_channels = 3
    kernel_size = 4
    stride = 2
    padding = 1
    output_padding = 0
    batch_norm = True
    activation = nn.ReLU()
    layers = build_convtranspose_layers(
        n_layers,
        in_channels,
        out_channels,
        kernel_size,
        stride,
        padding,
        output_padding,
        batch_norm,
        activation,
    )
    assert len(layers) == 2 * n_layers + 1
    assert isinstance(layers[0], nn.ConvTranspose2d)
    assert isinstance(layers[1], nn.BatchNorm2d)
    assert isinstance(layers[2], nn.ReLU)
    assert layers[0].in_channels == in_channels
    assert layers[0].out_channels == out_channels
    assert layers[0].kernel_size == (kernel_size, kernel_size)
    assert layers[0].stride == (stride, stride)
    assert layers[0].padding == (padding, padding)
    assert layers[0].output_padding == (output_padding, output_padding)
    assert layers[1].num_features == out_channels
    assert isinstance(layers[-1], nn.Tanh)
    print("Success!")




In [69]:
# Write a test for build_convtranspose_layers



In [85]:
def initialize_weights(model) -> None:
    """Initializes the weights of the model.

    Args:
        model (nn.Module): Model to initialize.
    """
    for m in model.modules():
        if isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Conv2d):
            nn.init.normal_(m.weight, 0.0, 0.02)
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.normal_(m.weight, 1.0, 0.02)
            nn.init.constant_(m.bias, 0)

In [120]:
class Generator(nn.Module):
    def __init__(
        self,
        latent_dim_size: int,
        img_size: int,
        img_channels: int,
        generator_num_features: int,
        n_layers: int,
        scale_factor: int = 2,
    ):
        """Implementation of DCGAN 
generator

        Args:
            self (Generator): Module
            latent_dim_size (int): size of the random vector we use for generating output
            img_size (int): size of the images we're generating
            img_channels (int): indicates RGB images
            generator_num_features (int): number of channels after first projection and reshaping
            n_layers (int): number of CONV_n layers
            scale_factor (int): scale factor for upsampling
        """
        super().__init__()
        self.latent_dim_size = latent_dim_size
        self.img_size = img_size
        self.img_channels = img_channels
        self.generator_num_features = generator_num_features
        self.n_layers = n_layers
        print(f"{img_size=} {img_channels=} {generator_num_features=} {n_layers=}")
        #assert img_size % (scale_factor**self.n_layers) == 0
        self.initial = nn.Sequential(
            nn.Linear(
                latent_dim_size,
                generator_num_features * (img_size // scale_factor**self.n_layers) ** 2,
            ),
            Rearrange(
                "b (c h w) -> b c h w", h=img_size // scale_factor**self.n_layers,
                w=img_size // scale_factor**self.n_layers
            ),
            nn.BatchNorm2d(generator_num_features),
            nn.ReLU(),
        )

        self.layers = build_convtranspose_layers(
            n_layers,
            in_channels=generator_num_features,
            out_channels=generator_num_features // 2,
            kernel_size=4,
            stride=2,
            padding=1,
            batch_norm=True,
            activation=nn.ReLU(),
        )

    def forward(self, x: t.Tensor):
        """Forward pass of the 
generator

        Args:
            self (Generator): Module
            x (t.Tensor): input tensor

        Returns:
            t.Tensor: generated image
        """
        x = self.initial(x)
        x = self.layers(x)
        return x


class Discriminator(nn.Module):
    def __init__(
        self,
        img_size: int,
        img_channels: int,
        generator_num_features: int,
        n_layers: int,
    ):
        """Implementation of DCGAN discriminator

        Args:
            img_size (int): _description_
            img_channels (int): _description_
            generator_num_features (int): _description_
            n_layers (int): _description_
        """        
        super().__init__()
        self.img_size = img_size
        self.img_channels = img_channels
        self.generator_num_features = generator_num_features
        self.n_layers = n_layers

        self.layers = build_conv_layers(n_layers, img_channels, generator_num_features)
        self.flattened = Rearrange("b c h w -> b (c h w)")
        self.classifier = nn.Linear(generator_num_features * (img_size // 2**n_layers)**2, 1)

    def forward(self, x: t.Tensor):
        """Forward pass of the discriminator

        Args:
            x (t.Tensor): input tensor

        Returns:
            t.Tensor: output tensor
        """
        x = self.layers(x)
        x = self.flattened(x)
        x = self.classifier(x)
        return x


class DCGAN(nn.Module):
    """Implementation of DCGAN

    Args:
        nn (nn.Module): Parent class
    """    
    netD: Discriminator
    netG: Generator
    def __init__(self, args):
        """_summary_
        """        
        super().__init__()
        self.netG = Generator(
            latent_dim_size=args.latent_dim_size,
            img_size=args.img_size,
            img_channels=args.img_channels,
            generator_num_features=args.generator_num_features,
            n_layers=args.n_layers,
        )
        self.netD = Discriminator(
            img_size=args.img_size,
            img_channels=args.img_channels,
            generator_num_features=args.generator_num_features,
            n_layers=args.n_layers,
        )
        initialize_weights(self.netD)
        initialize_weights(self.netG)


In [111]:
# from w5d1_solutions import celeb_DCGAN
# gen_net = DCGAN(**celeba_config)
# w5d1_utils.print_param_count(gen_net.netD, celeb_DCGAN.netD)

TypeError: __init__() got an unexpected keyword argument 'latent_dim_size'

In [117]:
from torchvision import transforms, datasets
image_size = 64
batch_size = 128
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

trainset = ImageFolder(
    root="/home/curttigges/projects/arena-v1-ldn-ct/w5_chapter5_modelling_objectives/data",
    transform=transform
)

w5d1_utils.show_images(trainset, rows=3, cols=5)

In [None]:
@dataclass
class DCGANargs():
    latent_dim_size: int
    img_size: int
    img_channels: int
    generator_num_features: int
    n_layers: int
    trainset: datasets.ImageFolder
    batch_size: int = 8
    epochs: int = 1
    lr: float = 0.0002
    betas: Tuple[float] = (0.5, 0.999)
    track: bool = True
    cuda: bool = False
    seconds_between_image_logs: int = 40

In [125]:
import time
from typing import Tuple


@dataclass
class DCGANargs():
    latent_dim_size: int
    img_size: int
    img_channels: int
    generator_num_features: int
    n_layers: int
    trainset: datasets.ImageFolder
    batch_size: int = 8
    epochs: int = 1
    lr: float = 0.0002
    betas: Tuple[float] = (0.5, 0.999)
    track: bool = True
    cuda: bool = False
    seconds_between_image_logs: int = 40

def train_DCGAN(args: DCGANargs) -> DCGAN:

    last_log_time = time.time()
    n_examples_seen = 0

    device = t.device("cuda" if args.cuda else "cpu")

    trainloader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True) # num_workers=2

    model = DCGAN(args).to(device).train()

    if args.track:
        wandb.init()
        wandb.watch(model)
    
    for epoch in range(args.epochs):
        
        progress_bar = tqdm(trainloader)

        optG = t.optim.Adam(model.netG.parameters(), lr=args.lr, betas=args.betas)
        optD = t.optim.Adam(model.netD.parameters(), lr=args.lr, betas=args.betas)

        for img_real, label in progress_bar: # remember that label is not used

            img_real = img_real.to(device)
            noise = t.randn(args.batch_size, model.netG.latent_dim_size).to(device)

            # ====== DISCRIMINIATOR TRAINING LOOP: maximise log(D(x)) + log(1-D(G(z))) ======

            # Zero gradients
            optD.zero_grad()
            # Calculate the two different components of the objective function
            D_x = model.netD(img_real)
            img_fake = model.netG(noise)
            D_G_z = model.netD(img_fake.detach())
            # Add them to get the objective function
            lossD = - (t.log(D_x).mean() + t.log(1 - D_G_z).mean())
            # Gradient descent step
            lossD.backward()
            optD.step()

            # ====== GENERATOR TRAINING LOOP: maximise log(D(G(z))) ======
            
            # Zero gradients
            optG.zero_grad()
            # Calculate the objective function
            D_G_z = model.netD(img_fake)
            lossG = - (t.log(D_G_z).mean())
            # Gradient descent step
            lossG.backward()
            optG.step()

            # Update progress bar
            progress_bar.set_description(f"{epoch=}, lossD={lossD.item():.4f}, lossG={lossG.item():.4f}")
            n_examples_seen += img_real.shape[0]

            # Log output, if required
            if args.track:
                wandb.log(dict(lossD=lossD, lossG=lossG), step=n_examples_seen)
                if time.time() - last_log_time > args.seconds_between_image_logs:
                    last_log_time = time.time()
                    arrays = get_generator_output(model.netG) # shape (8, 64, 64, 3)
                    images = [wandb.Image(arr) for arr in arrays]
                    wandb.log({"images": images}, step=n_examples_seen)

    name = model.__class__.__name__
    dirname = str(wandb.run.dir) if args.track else "models"
    filename = f"{dirname}/{name}.pt"
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    if args.track:
        print(f"Saving {name!r} to: {filename!r}")
        wandb.save(filename)
        wandb.finish()
                
    return model

@t.inference_mode()
def get_generator_output(netG, n_examples=8, rand_seed=0):
    netG.eval()
    device = next(netG.parameters()).device
    t.manual_seed(rand_seed)
    noise = t.randn(n_examples, netG.latent_dim_size).to(device)
    arrays = rearrange(netG(noise), "b c h w -> b h w c").detach().cpu().numpy()
    netG.train()
    return arrays

In [126]:
celeba_config = dict(
    latent_dim_size = 100,
    img_size = 64,
    img_channels = 3,
    generator_num_features = 1024,
    n_layers = 4,
)
celeba_mini_config = dict(
    latent_dim_size = 100,
    img_size = 64,
    img_channels = 3,
    generator_num_features = 512,
    n_layers = 4,
)

device = t.device("cuda" if t.cuda.is_available() else "cpu")
args = DCGANargs(**celeba_mini_config, trainset=trainset)
model = DCGAN(args).to(device).train()
#print_param_count(model)
x = t.randn(3, 100).to(device)
statsG = torchinfo.summary(model.netG, input_data=x)
statsD = torchinfo.summary(model.netD, input_data=model.netG(x))
print(statsG, statsD)



model = train_DCGAN(args)

img_size=64 img_channels=3 generator_num_features=512 n_layers=4
Layer (type:depth-idx)                   Output Shape              Param #
Generator                                [3, 3, 64, 64]            --
├─Sequential: 1-1                        [3, 512, 4, 4]            --
│    └─Linear: 2-1                       [3, 8192]                 827,392
│    └─Rearrange: 2-2                    [3, 512, 4, 4]            --
│    └─BatchNorm2d: 2-3                  [3, 512, 4, 4]            1,024
│    └─ReLU: 2-4                         [3, 512, 4, 4]            --
├─Sequential: 1-2                        [3, 3, 64, 64]            --
│    └─ConvTranspose2d: 2-5              [3, 256, 8, 8]            2,097,152
│    └─BatchNorm2d: 2-6                  [3, 256, 8, 8]            512
│    └─ReLU: 2-7                         [3, 256, 8, 8]            --
│    └─ConvTranspose2d: 2-8              [3, 128, 16, 16]          524,288
│    └─BatchNorm2d: 2-9                  [3, 128, 16, 16]          25

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcurt-tigges[0m ([33marena-ldn[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/25325 [00:00<?, ?it/s]

KeyboardInterrupt: 

DCGANargs(latent_dim_size=100, img_size=64, img_channels=3, generator_num_features=512, n_layers=4, trainset=Dataset ImageFolder
    Number of datapoints: 202599
    Root location: /home/curttigges/projects/arena-v1-ldn-ct/w5_chapter5_modelling_objectives/data
    StandardTransform
Transform: Compose(
               Resize(size=64, interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           ), batch_size=8, epochs=1, lr=0.0002, betas=(0.5, 0.999), track=True, cuda=True, seconds_between_image_logs=40)
img_size=64 img_channels=3 generator_num_features=512 n_layers=4
