<a href="https://colab.research.google.com/github/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm/blob/count_params_visualize_archs/parameters_and_architectures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install, Import, Input

In [15]:
import torch

## Torchsummary

In [16]:
!pip install torch-summary==1.4.5

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [17]:
from torchsummary import summary

In [18]:
QUIET_VERBOSE = 0  # as in torchsummary.summary

In [19]:
def print_layers_by_index(modelstatistics, from_i, to_i, show_input_size = True):

    if show_input_size:
        print('Input size:', modelstatistics.input_size)

    # header
    header_str_list = str(modelstatistics).split('\n')[:3]
    print('\n'.join(header_str_list))

    # the layers
    layer_str_list  = modelstatistics.layers_to_str().split('\n')

    if from_i != 0:
        print('...')

    print('\n'.join(layer_str_list[from_i:to_i]))

    if to_i != -1:
        print('...')

## Our team's code

In [20]:
# !rm -r /content/csgy6953_DeepLearning_Midterm/

# !git clone -b main "https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git"

# !cp -r /content/csgy6953_DeepLearning_Midterm/src/ .

In [21]:
!pip install -U "git+https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git
  Cloning https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git to /tmp/pip-req-build-ztkm5ng_
  Running command git clone -q https://github.com/ab7289-tandon-nyu/csgy6953_DeepLearning_Midterm.git /tmp/pip-req-build-ztkm5ng_
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone


In [22]:
from src.model import ResidualBlockType, ResNet, StemConfig

## Input

In [23]:
cifar10_one_sample = torch.rand((   3, 32, 32)) # BATCH_SIZE, RGB, 32x32
cifar10_one_batch  = torch.rand((4, 3, 32, 32)) #             RGB, 32x32

# Final Architecture
Our final architecture <br>
name on *wandb.ai*: `nish_49m_deep6_cyclic_lr_bottleneck_v2.pt`

In [24]:
# 1. the net's stem
stem_config = StemConfig(num_channels=64, kernel_size=5, stride=1, padding=2)

# 2. the net's body                 blocks/layer   channels  dropout
arch = [(ResidualBlockType.BASIC,        3,         64,      0.1),
        (ResidualBlockType.BASIC,        5,        128,      0.1),
        (ResidualBlockType.BOTTLENECK,  26,        256,      0.1),
        (ResidualBlockType.BOTTLENECK,   5,        512,      0.1)]

final_model = ResNet(arch, stem_config=stem_config, output_size=10, use_bias=False)

In [25]:
one_output = final_model(cifar10_one_batch) # initialize in_channels in lazyconv2d
final_model_modelstatistics = summary(final_model,  cifar10_one_sample.shape, verbose = QUIET_VERBOSE)

Final architecture total parameters

In [26]:
print('final_model ', f'{final_model_modelstatistics.total_params:,}')

final_model  4,997,194


Visualize final architecture

In [34]:
print_layers_by_index(final_model_modelstatistics, 0, -1)

Input size: [torch.Size([3, 32, 32])]
Layer (type:depth-idx)                        Output Shape              Param #
├─Sequential: 1-1                             [-1, 64, 32, 32]          --
|    └─Conv2d: 2-1                            [-1, 64, 32, 32]          4,800
|    └─BatchNorm2d: 2-2                       [-1, 64, 32, 32]          128
|    └─ReLU: 2-3                              [-1, 64, 32, 32]          --
├─Sequential: 1-2                             [-1, 512, 4, 4]           --
|    └─Sequential: 2-4                        [-1, 64, 32, 32]          --
|    |    └─ResidualBlock: 3-1                [-1, 64, 32, 32]          73,984
|    |    └─ResidualBlock: 3-2                [-1, 64, 32, 32]          73,984
|    |    └─ResidualBlock: 3-3                [-1, 64, 32, 32]          73,984
|    └─Sequential: 2-5                        [-1, 128, 16, 16]         --
|    |    └─ResidualBlock: 3-4                [-1, 128, 16, 16]         229,888
|    |    └─ResidualBlock: 3-5      

# Other Architectures
Our other architectures

# Notable Architectures
Wellknown architectures based on published papers

In [28]:
from torchvision.models import resnet18
from torchvision.models import resnet34
from torchvision.models import resnet50
from torchvision.models import resnet101
from torchvision.models import resnet152

In [29]:
resnet18_model  = resnet18()
resnet34_model  = resnet34()
resnet50_model  = resnet50()
resnet101_model = resnet101()
resnet152_model = resnet152()

In [30]:
resnet18_modelstatistics  = summary(resnet18_model,  cifar10_one_sample.shape, verbose = QUIET_VERBOSE)
resnet34_modelstatistics  = summary(resnet34_model,  cifar10_one_sample.shape, verbose = QUIET_VERBOSE)
resnet50_modelstatistics  = summary(resnet50_model,  cifar10_one_sample.shape, verbose = QUIET_VERBOSE)
resnet101_modelstatistics = summary(resnet101_model, cifar10_one_sample.shape, verbose = QUIET_VERBOSE)
resnet152_modelstatistics = summary(resnet152_model, cifar10_one_sample.shape, verbose = QUIET_VERBOSE)

Total parameters:

In [31]:
print('resnet18 ', f'{resnet18_modelstatistics.total_params:,}')
print('resnet34 ', f'{resnet34_modelstatistics.total_params:,}')
print('resnet50 ', f'{resnet50_modelstatistics.total_params:,}')
print('resnet101', f'{resnet101_modelstatistics.total_params:,}')
print('resnet152', f'{resnet152_modelstatistics.total_params:,}')

resnet18  11,689,512
resnet34  21,797,672
resnet50  25,557,032
resnet101 44,549,160
resnet152 60,192,808


ResNet18 architecture

In [32]:
print_layers_by_index(resnet18_modelstatistics, -20, -2)

Input size: [torch.Size([3, 32, 32])]
Layer (type:depth-idx)                   Output Shape              Param #
...
|    |    └─ReLU: 3-38                   [-1, 256, 2, 2]           --
├─Sequential: 1-8                        [-1, 512, 1, 1]           --
|    └─BasicBlock: 2-7                   [-1, 512, 1, 1]           --
|    |    └─Conv2d: 3-39                 [-1, 512, 1, 1]           1,179,648
|    |    └─BatchNorm2d: 3-40            [-1, 512, 1, 1]           1,024
|    |    └─ReLU: 3-41                   [-1, 512, 1, 1]           --
|    |    └─Conv2d: 3-42                 [-1, 512, 1, 1]           2,359,296
|    |    └─BatchNorm2d: 3-43            [-1, 512, 1, 1]           1,024
|    |    └─Sequential: 3-44             [-1, 512, 1, 1]           132,096
|    |    └─ReLU: 3-45                   [-1, 512, 1, 1]           --
|    └─BasicBlock: 2-8                   [-1, 512, 1, 1]           --
|    |    └─Conv2d: 3-46                 [-1, 512, 1, 1]           2,359,296
|    |    └