In [407]:
import torch as t
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
from collections import OrderedDict
from einops.layers.torch import Rearrange, Reduce

In [408]:
class Generator(nn.Module):

    def __init__(
        self,
        latent_dim_size: int = 100,           # size of the random vector we use for generating outputs
        img_size: int = 64,                  # size of the images we're generating
        img_channels: int = 3,              # indicates RGB images
        generator_num_features: int = 1024,    # number of channels after first projection and reshaping
        n_layers: int = 4,                  # number of CONV_n layers
    ):
        super().__init__()
        self.latent_dim_size = latent_dim_size
        self.img_size = img_size
        self.img_channels = img_channels
        self.generator_num_features = generator_num_features
        self.n_layers = n_layers
        first_height = img_size // (2 ** n_layers)
        linear1 = nn.Linear(latent_dim_size, generator_num_features * first_height * first_height, bias=False)
        channel_sizes = [img_size * 2 ** i for i in range(n_layers, 0, -1)] + [img_channels]
        print(channel_sizes)
        mods = nn.Sequential(OrderedDict([
            ("linear1", linear1),
            ("unflatten", Rearrange("b (ic h w) -> b ic h w", h=first_height, w=first_height)),
            ("batchnorm1", nn.BatchNorm2d(generator_num_features)),
            ("relu1", nn.ReLU())
        ]))
        print(f"c_sizes: {channel_sizes}")
        double_conv = lambda in_channels, out_channels: nn.ConvTranspose2d(
            in_channels=in_channels, 
            out_channels=out_channels, 
            kernel_size=4, 
            stride=2, 
            padding=1,
            bias=False
        )
        conv_layers = [double_conv(channel_sizes[i], channel_sizes[i + 1]) for i in range(len(channel_sizes) - 1)]
        # print(f"conv_layers: {conv_layers}")
        for i, conv_layer in enumerate(conv_layers):
            layer_n = i + 2
            if i == len(conv_layers) - 1:
                mods.add_module(f"conv{layer_n}", conv_layer)
                mods.add_module(f"tanh{layer_n}", nn.Tanh())
            else:
                mods.add_module(f"conv{layer_n}", conv_layer)
                mods.add_module(f"batchnorm{layer_n}", nn.BatchNorm2d(channel_sizes[i + 1]))
                mods.add_module(f"relu{layer_n}", nn.ReLU())
        self.generator = nn.Sequential(mods)

    def forward(self, x: t.Tensor):
        self.generator(x)

In [409]:
Generator()

[1024, 512, 256, 128, 3]
c_sizes: [1024, 512, 256, 128, 3]


Generator(
  (generator): Sequential(
    (0): Sequential(
      (linear1): Linear(in_features=100, out_features=16384, bias=False)
      (unflatten): Rearrange('b (ic h w) -> b ic h w', h=4, w=4)
      (batchnorm1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU()
      (conv2): ConvTranspose2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu2): ReLU()
      (conv3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu3): ReLU()
      (conv4): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
      (batchnorm4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu4): ReLU

In [410]:



class Discriminator(nn.Module):

    def __init__(
        self,
        img_size: int = 64,
        img_channels: int = 3,
        generator_num_features: int = 1024,
        n_layers: int = 4
    ):
        super().__init__()
        self.img_size = img_size
        self.img_channels = img_channels
        self.generator_num_features = generator_num_features
        self.n_layers = n_layers
        half_conv = lambda in_channels, out_channels: nn.Conv2d(
            in_channels=in_channels, 
            out_channels=out_channels, 
            kernel_size=4, 
            stride=2, 
            padding=1,
            bias=False
        )
        # generator_num_features for DCGAN starts with 128
        n_conv = n_layers + 1
        channel_sizes = [img_channels] + [img_size * (2 ** i) for i in range(1, n_conv+1)]
        print(f"channel_sizes: {channel_sizes}")
        print([(channel_sizes[i], channel_sizes[i + 1]) for i in range(n_conv-2)])
        conv_layers = [half_conv(channel_sizes[i], channel_sizes[i + 1]) for i in range(n_conv)]
        convs = [(f'conv{i}' , conv_layers[i]) for i in range(0, n_conv)]
        mods = []
        for i, conv in enumerate(convs):
            if i == 0:
                mods.append(conv)
                mods.append((f"relu{i}", nn.LeakyReLU(0.2)))
            elif i == len(convs) - 1:
                mods.append(conv)
                mods.append(("flatten", nn.Flatten()))
                mods.append(("linear", nn.Linear(generator_num_features * 4 * 4, 1)))
                mods.append(("sigmoid", nn.Sigmoid()))
            else:
                mods.append(conv)
                mods.append((f"batchnorm{i}", nn.BatchNorm2d(channel_sizes[i+1])))
                mods.append((f"relu{i}", nn.LeakyReLU(0.2)))
        self.discriminator = t.nn.Sequential(OrderedDict(mods))

    def forward(self, x: t.Tensor):
        return self.discriminator(x)
        
class DCGAN(nn.Module):
    netD: Discriminator
    netG: Generator

discriminator = Discriminator()

channel_sizes: [3, 128, 256, 512, 1024, 2048]
[(3, 128), (128, 256), (256, 512)]


In [411]:
celeba_config = dict(
    latent_dim_size = 100,
    img_size = 64,
    img_channels = 3,
    generator_num_features = 1024,
    n_layers = 4,
)

In [412]:
from dataclasses import dataclass
from torchvision import transforms, datasets
from typing import Tuple

@dataclass
class DCGANargs():
    latent_dim_size: int
    img_size: int
    img_channels: int
    generator_num_features: int
    n_layers: int
    trainset: datasets.ImageFolder
    batch_size: int = 8
    epochs: int = 1
    lr: float = 0.0002
    betas: Tuple[float] = (0.5, 0.999)
    track: bool = True
    cuda: bool = True
    seconds_between_image_logs: int = 10

def train_DCGAN(args: DCGANargs) -> DCGAN:
    pass

In [413]:
import importlib


import w5d1_solutions

importlib.reload(w5d1_solutions)
from w5d1_utils import print_param_count
generator = Generator()
print_param_count(generator, w5d1_solutions.celeb_DCGAN.netG)


[1024, 512, 256, 128, 3]
c_sizes: [1024, 512, 256, 128, 3]
Model 1, total params = 12658432


Unnamed: 0,name_1,shape_1,num_params_1
0,generator.0.linear1.weight,"(16384, 100)",1638400
1,generator.0.batchnorm1.weight,"(1024,)",1024
2,generator.0.batchnorm1.bias,"(1024,)",1024
3,generator.0.conv2.weight,"(1024, 512, 4, 4)",8388608
4,generator.0.batchnorm2.weight,"(512,)",512
5,generator.0.batchnorm2.bias,"(512,)",512
6,generator.0.conv3.weight,"(512, 256, 4, 4)",2097152
7,generator.0.batchnorm3.weight,"(256,)",256
8,generator.0.batchnorm3.bias,"(256,)",256
9,generator.0.conv4.weight,"(256, 128, 4, 4)",524288


Model 2, total params = 12658432


Unnamed: 0,num_params_2,shape_2,name_2
0,1638400,"(16384, 100)",project_and_reshape.0.weight
1,1024,"(1024,)",project_and_reshape.2.weight
2,1024,"(1024,)",project_and_reshape.2.bias
3,8388608,"(1024, 512, 4, 4)",layers.0.0.weight
4,512,"(512,)",layers.0.1.weight
5,512,"(512,)",layers.0.1.bias
6,2097152,"(512, 256, 4, 4)",layers.1.0.weight
7,256,"(256,)",layers.1.1.weight
8,256,"(256,)",layers.1.1.bias
9,524288,"(256, 128, 4, 4)",layers.2.0.weight


All parameter counts match!


Unnamed: 0,name_1,shape_1,num_params_1,num_params_2,shape_2,name_2
0,generator.0.linear1.weight,"(16384, 100)",1638400,1638400,"(16384, 100)",project_and_reshape.0.weight
1,generator.0.batchnorm1.weight,"(1024,)",1024,1024,"(1024,)",project_and_reshape.2.weight
2,generator.0.batchnorm1.bias,"(1024,)",1024,1024,"(1024,)",project_and_reshape.2.bias
3,generator.0.conv2.weight,"(1024, 512, 4, 4)",8388608,8388608,"(1024, 512, 4, 4)",layers.0.0.weight
4,generator.0.batchnorm2.weight,"(512,)",512,512,"(512,)",layers.0.1.weight
5,generator.0.batchnorm2.bias,"(512,)",512,512,"(512,)",layers.0.1.bias
6,generator.0.conv3.weight,"(512, 256, 4, 4)",2097152,2097152,"(512, 256, 4, 4)",layers.1.0.weight
7,generator.0.batchnorm3.weight,"(256,)",256,256,"(256,)",layers.1.1.weight
8,generator.0.batchnorm3.bias,"(256,)",256,256,"(256,)",layers.1.1.bias
9,generator.0.conv4.weight,"(256, 128, 4, 4)",524288,524288,"(256, 128, 4, 4)",layers.2.0.weight


In [414]:
discriminator = Discriminator()
print_param_count(discriminator, w5d1_solutions.celeb_DCGAN.netD)

channel_sizes: [3, 128, 256, 512, 1024, 2048]
[(3, 128), (128, 256), (256, 512)]
Model 1, total params = 44590593


Unnamed: 0,name_1,shape_1,num_params_1
0,discriminator.conv0.weight,"(128, 3, 4, 4)",6144
1,discriminator.conv1.weight,"(256, 128, 4, 4)",524288
2,discriminator.batchnorm1.weight,"(256,)",256
3,discriminator.batchnorm1.bias,"(256,)",256
4,discriminator.conv2.weight,"(512, 256, 4, 4)",2097152
5,discriminator.batchnorm2.weight,"(512,)",512
6,discriminator.batchnorm2.bias,"(512,)",512
7,discriminator.conv3.weight,"(1024, 512, 4, 4)",8388608
8,discriminator.batchnorm3.weight,"(1024,)",1024
9,discriminator.batchnorm3.bias,"(1024,)",1024


Model 2, total params = 11036160


Unnamed: 0,num_params_2,shape_2,name_2
0,6144,"(128, 3, 4, 4)",layers.0.0.weight
1,524288,"(256, 128, 4, 4)",layers.1.0.weight
2,256,"(256,)",layers.1.1.weight
3,256,"(256,)",layers.1.1.bias
4,2097152,"(512, 256, 4, 4)",layers.2.0.weight
5,512,"(512,)",layers.2.1.weight
6,512,"(512,)",layers.2.1.bias
7,8388608,"(1024, 512, 4, 4)",layers.3.0.weight
8,1024,"(1024,)",layers.3.1.weight
9,1024,"(1024,)",layers.3.1.bias


Parameter counts don't match up exactly.


Unnamed: 0,name_1,shape_1,num_params_1,num_params_2,shape_2,name_2
0,discriminator.conv0.weight,"(128, 3, 4, 4)",6144,6144,"(128, 3, 4, 4)",layers.0.0.weight
1,discriminator.conv1.weight,"(256, 128, 4, 4)",524288,524288,"(256, 128, 4, 4)",layers.1.0.weight
2,discriminator.batchnorm1.weight,"(256,)",256,256,"(256,)",layers.1.1.weight
3,discriminator.batchnorm1.bias,"(256,)",256,256,"(256,)",layers.1.1.bias
4,discriminator.conv2.weight,"(512, 256, 4, 4)",2097152,2097152,"(512, 256, 4, 4)",layers.2.0.weight
5,discriminator.batchnorm2.weight,"(512,)",512,512,"(512,)",layers.2.1.weight
6,discriminator.batchnorm2.bias,"(512,)",512,512,"(512,)",layers.2.1.bias
7,discriminator.conv3.weight,"(1024, 512, 4, 4)",8388608,8388608,"(1024, 512, 4, 4)",layers.3.0.weight
8,discriminator.batchnorm3.weight,"(1024,)",1024,1024,"(1024,)",layers.3.1.weight
9,discriminator.batchnorm3.bias,"(1024,)",1024,1024,"(1024,)",layers.3.1.bias


In [415]:
from torchvision import transforms, datasets

transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

trainset = ImageFolder(
    root="data",
    transform=transform
)

w5d1_utils.show_images(trainset, rows=3, cols=5)

NameError: name 'image_size' is not defined