In [None]:
from resnet import resnet18, resnet34, resnet50, resnet101, resnet152
from resnet import resnet18_bottleneck, resnet34_bottleneck, resnet50_bottleneck, resnet101_bottleneck, resnet152_bottleneck
import numpy as np
import torch
from torch import nn

device = "cuda"

In [None]:
from torch.profiler import profile, record_function, ProfilerActivity

def profile_memory(model, inputs, sort_by_self=True):
    sort_by_string = "self_cpu_memory_usage" if sort_by_self else "cpu_memory_usage"
    model(inputs)   # Warmup

    with profile(activities=[ProfilerActivity.CPU],
                 profile_memory=True, record_shapes=True) as prof:
        with record_function("model_inference"):
            model(inputs)

    print(prof.key_averages().table(sort_by=sort_by_string, row_limit=15))


def profile_memory_gpu(model, inputs, sort_by_self=True):
    sort_by_string = "self_cuda_memory_usage" if sort_by_self else "cuda_memory_usage"
    model(inputs) #Warmup

    with profile(activities=[
        ProfilerActivity.CPU, ProfilerActivity.CUDA],
                 profile_memory=True, record_shapes=True) as prof:
        with record_function("model_inference"):
            model(inputs)

    print(prof.key_averages().table(sort_by=sort_by_string, row_limit=15))

# Memory Footprint

We investigate how the required memory changes as we increase the size of the network and how much efficiency we gain, if any, from using bottleneck layers. Ideally, we hope that bottleneck layers will yield large efficiency gains in memory footprints, potentially reducing the required memory significantly. Additionally, we profile the larger ResNets (50, 101 and 152) with what we refer to as "Large Mode", where the number of internal channels have been quadrupled from the plain implementations. These large mode resnets also uses bottleneck layers.

### Resnet-18 vs Bottleneck Resnet-18

In [None]:
model = resnet18()
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.01%       1.613ms         0.01%       1.613ms       8.065us       4.74 Gb       4.74 Gb           200  
                  aten::clamp_min         3.22%     833.532ms         3.22%     833.532ms      52.096ms       1.44 Gb       1.44 Gb            16  
    aten::max_pool2d_with_indices         8.44%        2.183s         8.44%        2.183s        2.183s     588.00 Mb     588.00 Mb             1  
                      aten::addmm         0.02%       4.109ms         0.02%       4.262ms       4.262ms    1000.

In [None]:
model = resnet18().to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         0.19%     714.000us         0.19%     714.000us       7.069us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       2.37 Gb       2.37 G

In [None]:
model = resnet18_bottleneck()
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.01%       2.839ms         0.01%       2.839ms      10.139us       8.04 Gb       8.04 Gb           280  
                  aten::clamp_min         4.81%        1.166s         4.81%        1.166s      48.565ms       2.15 Gb       2.15 Gb            24  
    aten::max_pool2d_with_indices        21.67%        5.253s        21.67%        5.253s        5.253s       1.15 Gb       1.15 Gb             1  
                      aten::addmm         0.03%       7.888ms         0.03%       8.195ms       8.195ms       1.

In [None]:
model = resnet18_bottleneck().to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         0.13%     650.000us         0.13%     650.000us       4.610us       0.000us         0.00%       0.000us       0.000us           0 b           0 b       4.02 Gb       4.02 G

### Resnet-50 vs Bottleneck Resnet-50 vs Large Bottleneck Resnet-50

In [None]:
model = resnet50()
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.12%       6.521ms         0.12%       6.521ms      18.114us     912.80 Mb     912.80 Mb           360  
                  aten::clamp_min         1.46%      79.833ms         1.46%      79.833ms       2.495ms     336.88 Mb     336.88 Mb            32  
    aten::max_pool2d_with_indices         7.20%     392.840ms         7.20%     392.840ms     392.840ms      73.50 Mb      73.50 Mb             1  
                      aten::addmm         0.02%     858.000us         0.02%     895.000us     895.000us     125.

In [None]:
model = resnet50().to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         0.80%     805.000us         0.80%     805.000us       4.448us       0.000us         0.00%       0.000us       0.000us           0 b           0 b     457.38 Mb     457.38 M

In [None]:
model = resnet50_bottleneck()
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.40%       5.863ms         0.40%       5.863ms      11.275us     742.80 Mb     742.80 Mb           520  
                  aten::clamp_min         3.46%      51.206ms         3.46%      51.206ms       1.067ms     252.66 Mb     252.66 Mb            48  
    aten::max_pool2d_with_indices        20.10%     297.427ms        20.10%     297.427ms     297.427ms      73.50 Mb      73.50 Mb             1  
                 aten::empty_like         0.01%     202.000us         0.12%       1.727ms      33.212us     372.

In [None]:
model = resnet50_bottleneck().to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         4.06%       1.269ms         4.06%       1.269ms       4.862us       0.000us         0.00%       0.000us       0.000us           0 b           0 b     375.23 Mb     375.23 M

In [None]:
model = resnet50_bottleneck(large_mode=True)
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.31%       9.175ms         0.31%       9.175ms      17.311us     804.05 Mb     804.05 Mb           530  
                  aten::clamp_min         1.85%      54.477ms         1.85%      54.477ms       1.135ms     256.88 Mb     256.88 Mb            48  
    aten::max_pool2d_with_indices        13.40%     394.519ms        13.40%     394.519ms     394.519ms      73.50 Mb      73.50 Mb             1  
                       aten::mean         0.00%      40.000us         0.04%       1.083ms       1.083ms     256.

In [None]:
model = resnet50_bottleneck(large_mode=True).to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         2.12%       1.040ms         2.12%       1.040ms       3.910us       0.000us         0.00%       0.000us       0.000us           0 b           0 b     405.86 Mb     405.86 M

### ResNet-152 vs Bottleneck ResNet-152 vs Large Bottleneck ResNet-152

In [None]:
model = resnet152()
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.09%      14.292ms         0.09%      14.292ms      13.742us       1.80 Gb       1.80 Gb          1040  
                  aten::clamp_min         1.52%     238.191ms         1.52%     238.191ms       2.382ms     802.38 Mb     802.38 Mb           100  
    aten::max_pool2d_with_indices         1.96%     308.286ms         1.96%     308.286ms     308.286ms      73.50 Mb      73.50 Mb             1  
                      aten::addmm         0.01%       1.047ms         0.01%       1.095ms       1.095ms     125.

In [None]:
model = resnet152().to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         1.61%       4.289ms         1.61%       4.289ms       8.232us       0.000us         0.00%       0.000us       0.000us           0 b           0 b     924.94 Mb     924.94 M

In [None]:
model = resnet152_bottleneck()
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.28%       9.797ms         0.28%       9.797ms       6.362us       1.41 Gb       1.41 Gb          1540  
                  aten::clamp_min         3.60%     127.026ms         3.60%     127.026ms     846.840us     601.78 Mb     601.78 Mb           150  
    aten::max_pool2d_with_indices         9.57%     337.225ms         9.57%     337.225ms     337.225ms      73.50 Mb      73.50 Mb             1  
                      aten::addmm         0.02%     790.000us         0.02%     837.000us     837.000us     125.

In [None]:
model = resnet152_bottleneck().to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                            aten::empty         4.83%       3.751ms         4.83%       3.751ms       4.865us       0.000us         0.00%       0.000us       0.000us           0 b           0 b     724.01 Mb     724.01 M

In [None]:
model = resnet152_bottleneck(large_mode=True)
inputs = torch.rand(32, 3, 224, 224)    # Batch size 32
profile_memory(model, inputs, True)

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                      aten::empty         0.23%      14.088ms         0.23%      14.088ms       9.089us       1.47 Gb       1.47 Gb          1550  
                  aten::clamp_min         1.99%     121.850ms         1.99%     121.850ms     812.333us     606.00 Mb     606.00 Mb           150  
    aten::max_pool2d_with_indices         4.86%     297.454ms         4.86%     297.454ms     297.454ms      73.50 Mb      73.50 Mb             1  
                       aten::mean         0.00%      44.000us         0.03%       1.707ms       1.707ms     256.

In [None]:
model = resnet152_bottleneck(large_mode=True).to(device)
inputs = torch.rand(32, 3, 224, 224).to(device)    # Batch size 32
profile_memory_gpu(model, inputs, True)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                aten::cudnn_convolution         4.64%       7.916ms        10.50%      17.915ms     115.581us      71.610ms        67.32%      71.610ms     462.000us           0 b           0 b     756.84 Mb     756.84 M