In [2]:
import argparse

import torch
from torchvision.datasets import MNIST
import torchvision.transforms as transforms

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from lateral_connections import LateralModel, VggModel
from lateral_connections import VggWithLCL
from lateral_connections import MNISTCDataset
from lateral_connections.loaders import get_loaders, load_mnistc
from lateral_connections.character_models import SmallVggWithLCL, VGGReconstructionLCL

import datetime

In [3]:
config = {
    'num_classes': 10,
    'learning_rate': 1e-3,
    'dropout': 0.2,
    'num_epochs': 4,
    'batch_size': 10,
    'use_lcl': True,
    'num_multiplex': 4,
    'lcl_alpha': 1e-3,
    'lcl_theta': 0.2,
    'lcl_eta': 0.0,
    'lcl_iota': 0.2
}

model_path = 'models/vgg_with_lcl/VGG19_LCL_2022-04-05_155542__it6250_e1.pt'

model = VggWithLCL(config['num_classes'], learning_rate=config['learning_rate'], dropout=config['dropout'],
    num_multiplex=config['num_multiplex'], do_wandb=False, run_identifier="",
    lcl_alpha=config['lcl_alpha'], lcl_eta=config['lcl_eta'], lcl_theta=config['lcl_theta'], lcl_iota=config['lcl_iota'])
model.load(model_path)

if config['use_lcl']:
    model.features.lcl3.enable()

In [4]:
model

VggWithLCL(
  (features): Sequential(
    (pool1): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (lcl1): LaterallyConnectedLayer(4, (64, 112, 112), d=2, disabled=True, update=False)
    (pool2): Sequential(
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (lcl2): LaterallyConnectedLayer(4, (128, 56, 56), d=2, disabled=True, update=False)
    (pool3): Sequential(
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU

### Parameter Sizes

In [4]:
for layer in model.features:
    if layer.__class__.__name__ == 'LaterallyConnectedLayer':
        if layer.disabled:
            continue
        
    print(layer.__class__.__name__.ljust(30), sum(p.numel() for p in layer.parameters()))
    
print(model.classifier.__class__.__name__.ljust(30), sum(p.numel() for p in model.classifier.parameters()))
    

print(model.features.lcl3.mu.__class__.__name__, model.features.lcl3.mu.numel())
print(model.features.lcl3.S.__class__.__name__, model.features.lcl3.S.numel())
print(model.features.lcl3.M.__class__.__name__, model.features.lcl3.M.numel())
    
vgg19 = 37568 + 221440 + 2065408 + 8259584 + 9439232 + 119586826
lcl = vgg19 + 26217486 - (3*1024)

print('')
print(f"Vgg19 #Parameter:\t{vgg19}\nVgg19+LCL #Parameter:\t{lcl} ({round(100*lcl/vgg19,2)}%)")

Sequential                     37568
Sequential                     221440
Sequential                     2065408
LaterallyConnectedLayer        26217486
Sequential                     8259584
Sequential                     9439232
Sequential                     119586826
Parameter 1024
Parameter 1024
Parameter 1024

Vgg19 #Parameter:	139610058
Vgg19+LCL #Parameter:	165824472 (118.78%)


In [11]:
from typing import Union, List, Dict, Any, cast
import torch.nn as nn
from torchvision.models.vgg import VGG

# Taken from https://pytorch.org/vision/main/_modules/torchvision/models/vgg.html#vgg19
#
cfgs: Dict[str, List[Union[str, int]]] = {
    "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"],
    "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"],
}

# Taken from https://pytorch.org/vision/main/_modules/torchvision/models/vgg.html#vgg19
#
# Changed to have only 1 in_channel.
#
def make_layers(cfg: List[Union[str, int]], batch_norm: bool = False) -> nn.Sequential:
    layers: List[nn.Module] = []
    in_channels = 1
    for v in cfg:
        if v == "M":
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            v = cast(int, v)
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

def build_custom_vgg19(dropout, num_classes, config_ident='E'):
    net = VGG(make_layers(cfgs[config_ident], batch_norm=False))
    net.classifier = nn.Sequential(
        nn.Linear(512 * 7 * 7, 4096),
        nn.ReLU(True),
        nn.Dropout(p=dropout),
        nn.Linear(4096, 4096),
        nn.ReLU(True),
        nn.Dropout(p=dropout),
        nn.Linear(4096, num_classes)
        # nn.Softmax(-1)
    )
    return net

VGGA = build_custom_vgg19(0.2, 10, 'A')
print(f"VGG-11: {sum(p.numel() for p in VGGA.parameters()):,}")

VGGB = build_custom_vgg19(0.2, 10, 'B')
print(f"VGG-13: {sum(p.numel() for p in VGGB.parameters()):,}")

VGGD = build_custom_vgg19(0.2, 10, 'D')
print(f"VGG-16: {sum(p.numel() for p in VGGD.parameters()):,}")

VGGE = build_custom_vgg19(0.2, 10, 'E')
print(f"VGG-19: {sum(p.numel() for p in VGGE.parameters()):,} + LCL: {26217486 - (3*1024):,} (+{round(100*lcl/vgg19,2)-100}%)")




VGG-11: 128,806,154
VGG-13: 128,990,666
VGG-16: 134,300,362
VGG-19: 139,610,058 + LCL: 26,214,414 (+18.78%)
