In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

In [2]:
from networks.inception import InceptionResnet, SupIncepResnet
from networks.simple_cnn import BaselineCNNClassifier
from networks.resnet_big import SupCEResNet, SupConResNet, LinearClassifier
import torch
from torchsummary import summary
from thop import profile
import numpy as np
import time

In [3]:
def measure_time_gpu(model, device, rep):
    model = model.to(device=device)
    dummy_input = torch.randn(1, 1, 29, 29, dtype=torch.float).to(device)
    # INIT LOGGERS
    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    repetitions = rep
    timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    for _ in range(10):
        _ = model(dummy_input)
    # MEASURE PERFORMANCE
    with torch.no_grad():
        for rep in range(repetitions):
            starter.record()
            _ = model(dummy_input)
            ender.record()
            # WAIT FOR GPU SYNC
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            timings[rep] = curr_time
    mean_syn = np.sum(timings) / repetitions
    std_syn = np.std(timings)
    return mean_syn, std_syn


In [4]:
def measure_time_cpu(model, device, rep = 10):
    model = model.to(device=device)
    x = torch.rand((1, 1, 29, 29), device=device)
    timings=np.zeros((rep,1))
    for i in range(rep):    
        start_time = time.time()
        out = model(x)
        timings[i] = time.time() - start_time
    mean_syn = np.sum(timings) / rep
    std_syn = np.std(timings)
    return mean_syn, std_syn

In [5]:
baseline_model = SupCEResNet(name='resnet18', num_classes=5)

In [6]:
measure_time_gpu(baseline_model, 'cuda', rep=100)

(0.5157068842649459, 0.8879897666619131)

In [7]:
measure_time_cpu(baseline_model, device='cpu', rep=100)

In [None]:
incep = SupIncepResnet(num_classes=5)

In [None]:
measure_time_gpu(incep, 'cuda', rep=100)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


(1.2263798499107361, 1.4931276409882448)

In [9]:
measure_time_cpu(incep, device='cpu', rep=100)

In [None]:
x = torch.rand((1, 1, 29, 29), device='cpu')
baseline_model = baseline_model.to(device='cpu')
macs, params = profile(baseline_model, inputs=(x, ))
print('MACs (G): ', macs/1000**3)
print('Params (M): ', params/1000**2)

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MACs (G):  0.032560592
Params (M):  0.700533


In [None]:
x = torch.rand((1, 1, 29, 29), device='cpu')
incep = incep.to(device='cpu')
macs, params = profile(incep, inputs=(x, ))
print('MACs (G): ', macs/1000**3)
print('Params (M): ', params/1000**2)

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MACs (G):  0.097190176
Params (M):  1.694181


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448224956/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


In [None]:
summary(baseline_model.cuda(), (1, 29, 29))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 29, 29]             144
       BatchNorm2d-2           [-1, 16, 29, 29]              32
            Conv2d-3           [-1, 16, 29, 29]           2,304
       BatchNorm2d-4           [-1, 16, 29, 29]              32
            Conv2d-5           [-1, 16, 29, 29]           2,304
       BatchNorm2d-6           [-1, 16, 29, 29]              32
        BasicBlock-7           [-1, 16, 29, 29]               0
            Conv2d-8           [-1, 16, 29, 29]           2,304
       BatchNorm2d-9           [-1, 16, 29, 29]              32
           Conv2d-10           [-1, 16, 29, 29]           2,304
      BatchNorm2d-11           [-1, 16, 29, 29]              32
       BasicBlock-12           [-1, 16, 29, 29]               0
           Conv2d-13           [-1, 32, 15, 15]           4,608
      BatchNorm2d-14           [-1, 32,

In [None]:
summary(incep.cuda(), (1, 29, 29))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 29, 29]             320
            Conv2d-2           [-1, 32, 27, 27]           9,248
         MaxPool2d-3           [-1, 32, 13, 13]               0
            Conv2d-4           [-1, 64, 13, 13]           2,112
            Conv2d-5          [-1, 128, 13, 13]          73,856
            Conv2d-6          [-1, 128, 13, 13]         147,584
              Stem-7          [-1, 128, 13, 13]               0
            Conv2d-8           [-1, 32, 13, 13]           4,128
            Conv2d-9           [-1, 32, 13, 13]           4,128
           Conv2d-10           [-1, 32, 13, 13]           9,248
           Conv2d-11           [-1, 32, 13, 13]           4,128
           Conv2d-12           [-1, 32, 13, 13]           9,248
           Conv2d-13           [-1, 32, 13, 13]           9,248
           Conv2d-14          [-1, 128,