## Pix2pixHD

### Model summary

In [1]:
from pytorch_lightning.trainer import Trainer
from models.pix2pixHD_model import BaseOptions, Pix2PixHDModel
from torchsummary import summary
import torch
from pytorch_lightning.loggers import TensorBoardLogger
from pathlib import Path
from time import time

In [57]:
opt = {
    "gpu_ids": [0],
    "netG": 'global',
    "ngf": 64,
    "num_upsampling_layers": "normal",
    "crop_size": 512,
    "aspect_ratio": 1.0,
    "use_vae": "store_true",
    "z_dim": 256,
    "norm_G": "spectralspadesyncbatch3x3",
    "norm_D": "spectralinstance",
    "norm_E": "spectralinstance",
    "label_nc": 182,
    "contain_dontcare_label": "store_true",
    "output_nc": 3,
    "no_instance": "store_true",
    "init_type": "xavier",
    "init_variance": 0.02,
    "isTrain": True,
    "which_epoch": "latest",
    "checkpoints_dir": './checkpoints',
    "name": 'cityscapes_pretrained',
    "netD": 'multiscale',
    "num_D": 2,
    "netD_subarch": 'n_layer',
    "ndf": 64,
    "n_layers_D": 4,
    "continue_train": False,
    "gan_mode": 'hinge',
    "no_vgg_loss": False,
    "norm": "instance",
    "n_downsample_global": 4,
    "n_blocks_global": 9,
    "n_blocks_local": 3,
    "n_local_enhancers": 1,
    "no_lsgan": True,
    "no_ganFeat_loss": True,
    "feat_num": 3,
    "nef": 16,
    "n_downsample_E": 4,
}

In [58]:
opt = BaseOptions(**opt)

In [33]:
import models.networks.pix2pxhd_nets as networks

In [34]:
netG_input_nc = 3

In [35]:
netG = networks.define_G(netG_input_nc, opt.output_nc, opt.ngf, 
                         opt.netG, opt.n_downsample_global, opt.n_blocks_global, opt.n_local_enhancers, opt.n_blocks_local, 
                         opt.norm, gpu_ids=opt.gpu_ids)

GlobalGenerator(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1))
    (2): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (11): InstanceNorm2d(512, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (14): InstanceNorm2d(1024, eps=1e-05, momentum=0.1, affi

In [37]:
summary(netG, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
   ReflectionPad2d-1          [-1, 3, 262, 262]               0
            Conv2d-2         [-1, 64, 256, 256]           9,472
    InstanceNorm2d-3         [-1, 64, 256, 256]               0
              ReLU-4         [-1, 64, 256, 256]               0
              ReLU-5         [-1, 64, 256, 256]               0
              ReLU-6         [-1, 64, 256, 256]               0
              ReLU-7         [-1, 64, 256, 256]               0
              ReLU-8         [-1, 64, 256, 256]               0
              ReLU-9         [-1, 64, 256, 256]               0
             ReLU-10         [-1, 64, 256, 256]               0
             ReLU-11         [-1, 64, 256, 256]               0
             ReLU-12         [-1, 64, 256, 256]               0
             ReLU-13         [-1, 64, 256, 256]               0
           Conv2d-14        [-1, 128, 1

In [46]:
netD_input_nc = 6
use_sigmoid = opt.no_lsgan

In [47]:
netD = networks.define_D(netD_input_nc, opt.ndf, opt.n_layers_D, opt.norm, use_sigmoid, opt.num_D, not opt.no_ganFeat_loss, gpu_ids=opt.gpu_ids)

MultiscaleDiscriminator(
  (layer0): Sequential(
    (0): Conv2d(6, 64, kernel_size=(4, 4), stride=(2, 2), padding=(2, 2))
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(2, 2))
    (3): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(2, 2))
    (6): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(2, 2))
    (9): InstanceNorm2d(512, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (10): LeakyReLU(negative_slope=0.2, inplace=True)
    (11): Conv2d(512, 512, kernel_size=(4, 4), stride=(1, 1), padding=(2, 2))
    (12): InstanceNorm2d(512, eps=1e-05, momentum=0.1, affine=False, tr

In [49]:
summary(netD, input_size=(6,256,256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 129, 129]           6,208
         LeakyReLU-2         [-1, 64, 129, 129]               0
            Conv2d-3          [-1, 128, 65, 65]         131,200
    InstanceNorm2d-4          [-1, 128, 65, 65]               0
         LeakyReLU-5          [-1, 128, 65, 65]               0
            Conv2d-6          [-1, 256, 33, 33]         524,544
    InstanceNorm2d-7          [-1, 256, 33, 33]               0
         LeakyReLU-8          [-1, 256, 33, 33]               0
            Conv2d-9          [-1, 512, 17, 17]       2,097,664
   InstanceNorm2d-10          [-1, 512, 17, 17]               0
        LeakyReLU-11          [-1, 512, 17, 17]               0
           Conv2d-12          [-1, 512, 18, 18]       4,194,816
   InstanceNorm2d-13          [-1, 512, 18, 18]               0
        LeakyReLU-14          [-1, 512,

In [61]:
netE = networks.define_G(opt.output_nc, opt.feat_num, opt.nef, "encoder", opt.n_downsample_E, norm=opt.norm, gpu_ids=opt.gpu_ids)

Encoder(
  (model): Sequential(
    (0): ReflectionPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 16, kernel_size=(7, 7), stride=(1, 1))
    (2): InstanceNorm2d(16, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (3): ReLU(inplace=True)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): InstanceNorm2d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (6): ReLU(inplace=True)
    (7): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (8): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (9): ReLU(inplace=True)
    (10): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (11): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
    (12): ReLU(inplace=True)
    (13): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (14): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=False, track_

In [63]:
summary(netD, input_size=(6,256,256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 129, 129]           6,208
         LeakyReLU-2         [-1, 64, 129, 129]               0
            Conv2d-3          [-1, 128, 65, 65]         131,200
    InstanceNorm2d-4          [-1, 128, 65, 65]               0
         LeakyReLU-5          [-1, 128, 65, 65]               0
            Conv2d-6          [-1, 256, 33, 33]         524,544
    InstanceNorm2d-7          [-1, 256, 33, 33]               0
         LeakyReLU-8          [-1, 256, 33, 33]               0
            Conv2d-9          [-1, 512, 17, 17]       2,097,664
   InstanceNorm2d-10          [-1, 512, 17, 17]               0
        LeakyReLU-11          [-1, 512, 17, 17]               0
           Conv2d-12          [-1, 512, 18, 18]       4,194,816
   InstanceNorm2d-13          [-1, 512, 18, 18]               0
        LeakyReLU-14          [-1, 512,