In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from utils import get_network, get_training_dataloader, get_test_dataloader, WarmUpLR, \
    most_recent_folder, most_recent_weights, last_epoch, best_acc_weights
from conf import settings
from models.vgg import vgg16_bn
from torch.profiler import profile, record_function, ProfilerActivity
import copy
from torchinfo import summary

In [4]:
net = vgg16_bn()
net.load_state_dict(torch.load('checkpoint/vgg16/Thursday_27_July_2023_16h_07m_07s/vgg16-200-regular.pth',map_location=torch.device('cpu')))

<All keys matched successfully>

In [5]:
net.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [17]:
summary(net, input_size = (1,3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 100]                  --
├─Sequential: 1-1                        [1, 512, 1, 1]            --
│    └─Conv2d: 2-1                       [1, 64, 32, 32]           1,792
│    └─BatchNorm2d: 2-2                  [1, 64, 32, 32]           128
│    └─ReLU: 2-3                         [1, 64, 32, 32]           --
│    └─Conv2d: 2-4                       [1, 64, 32, 32]           36,928
│    └─BatchNorm2d: 2-5                  [1, 64, 32, 32]           128
│    └─ReLU: 2-6                         [1, 64, 32, 32]           --
│    └─MaxPool2d: 2-7                    [1, 64, 16, 16]           --
│    └─Conv2d: 2-8                       [1, 128, 16, 16]          73,856
│    └─BatchNorm2d: 2-9                  [1, 128, 16, 16]          256
│    └─ReLU: 2-10                        [1, 128, 16, 16]          --
│    └─Conv2d: 2-11                      [1, 128, 16, 16]          147,

In [6]:
cifar100_test_loader = get_test_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        #settings.CIFAR100_PATH,
        num_workers=4,
        batch_size=128,
    )

Files already downloaded and verified


In [7]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof:
    with record_function("model_inference"):
        net(torch.rand(1,3,32,32))

STAGE:2023-07-30 16:31:41 122:122 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-07-30 16:31:42 122:122 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-07-30 16:31:42 122:122 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [8]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference        13.01%      16.077ms       100.00%     123.578ms     123.578ms           0 b      -2.57 Mb             1  
                     aten::conv2d         1.23%       1.524ms        56.98%      70.412ms       5.416ms       1.05 Mb           0 b            13  
                aten::convolution         0.85%       1.048ms        55.74%      68.888ms       5.299ms       1.05 Mb           0 b            13  
               aten::_convolution         3.10%       3.827ms        54.90%      67.840ms       5.218ms       1.

In [9]:
model = copy.deepcopy(net)
    
model_int8 = torch.quantization.quantize_dynamic(
model,  # the original model
{torch.nn.Linear, torch.nn.Sequential},  # a set of layers to dynamically quantize
dtype=torch.qint8)  # the target dtype for quantized weights

model_int8.to('cpu')

# Load state_dict
model_int8.load_state_dict(torch.load('model_quantization_int8.pth', map_location = torch.device('cpu')))


  device=storage.device,


<All keys matched successfully>

In [10]:
with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof:
    with record_function("model_quan_inference"):
        model_int8(torch.rand(1,3,32,32))

STAGE:2023-07-30 16:31:43 122:122 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-07-30 16:31:43 122:122 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-07-30 16:31:43 122:122 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


In [11]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
             model_quan_inference        31.50%      14.191ms       100.00%      45.054ms      45.054ms           0 b      -2.51 Mb             1  
                     aten::conv2d         0.14%      64.000us        40.25%      18.136ms       1.395ms       1.05 Mb           0 b            13  
                aten::convolution         0.66%     298.000us        40.11%      18.072ms       1.390ms       1.05 Mb           0 b            13  
               aten::_convolution         0.36%     160.000us        39.45%      17.774ms       1.367ms       1.

In [12]:
def test_model(model, name='model_inference'):
    model.eval()

    correct_1 = 0.0
    correct_5 = 0.0
    total = 0
    acc = 0.0
    with torch.no_grad():
        with profile(activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True) as prof:
            with record_function(name):
                for n_iter, (image, label) in enumerate(cifar100_test_loader):
                    _ = model(image)
                 

    # if args.gpu:
    #     print('GPU INFO.....')
    #     print(torch.cuda.memory_summary(), end='')

    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))


In [13]:
test_model(net)

STAGE:2023-07-30 16:31:43 122:122 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-07-30 16:32:49 122:122 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-07-30 16:32:49 122:122 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         1.73%        1.136s       100.00%       65.756s       65.756s     198.38 Kb     -22.19 Gb             1  
                                           aten::conv2d         0.02%      13.443ms        78.29%       51.480s      50.126ms      10.30 Gb           0 b          1027  
                                      aten::convolution         0.02%      16.421ms        78.27%       51.466s      50.113ms      10.30 Gb           

In [14]:
test_model(model_int8, "ok")

STAGE:2023-07-30 16:32:52 122:122 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-07-30 16:33:52 122:122 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-07-30 16:33:52 122:122 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                     ok         1.61%     965.402ms       100.00%       59.890s       59.890s     198.38 Kb     -22.19 Gb             1  
                                           aten::conv2d         0.02%      11.751ms        82.06%       49.143s      47.851ms      10.30 Gb           0 b          1027  
                                      aten::convolution         0.03%      18.591ms        82.04%       49.131s      47.840ms      10.30 Gb           

In [18]:
!pip install matplotlib



In [19]:
!python3 test.py -net=v5 -weights=v5-200-regular.pth -b=128

Files already downloaded and verified
MyCompressNet5(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mp1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv7): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv8): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), padding=same)
  (bn8): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv9): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (bn9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mp2): MaxPool2d(kernel_

In [21]:
!python3 test.py -net=v1 -weights=v1-200regular.pth -b=128

Files already downloaded and verified
MyCompressNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=4096, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_featu