<a href="https://colab.research.google.com/github/john-jehiel/cards-image-classification/blob/main/Custom_Function_for_MAC_calculation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
!pip install thop



In [67]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from thop import profile

### Utility functions

In [68]:
def l_prod(input_list):
    res = 1
    for value in input_list:
        res *= value
    return res

def l_sum(input_list):
    res = 0
    for value in input_list:
        res += value
    return res

def calculate_parameters(param_list):
    total_params = 0
    for p in param_list:
        total_params += torch.DoubleTensor([p.nelement()])
    return total_params

def calculate_zero_ops():
    return torch.DoubleTensor([int(0)])

def calculate_conv2d_flops(input_size: list, output_size: list, kernel_size: list, groups: int, bias: bool = False):
    in_c = input_size[1]
    g = groups
    return l_prod(output_size) * (in_c // g) * l_prod(kernel_size[2:])

def calculate_conv(bias, kernel_size, output_size, in_channel, group):
    return torch.DoubleTensor([output_size * (in_channel / group * kernel_size + bias)])

def calculate_norm(input_size):
    return torch.DoubleTensor([2 * input_size])

def calculate_relu_flops(input_size):
    return 0

def calculate_relu(input_size: torch.Tensor):
    return torch.DoubleTensor([int(input_size)])

def calculate_softmax(batch_size, nfeatures):
    total_exp = nfeatures
    total_add = nfeatures - 1
    total_div = nfeatures
    total_ops = batch_size * (total_exp + total_add + total_div)
    return torch.DoubleTensor([int(total_ops)])

def calculate_avgpool(input_size):
    return torch.DoubleTensor([int(input_size)])

def calculate_adaptive_avg(kernel_size, output_size):
    total_div = 1
    kernel_op = kernel_size + total_div
    return torch.DoubleTensor([int(kernel_op * output_size)])

def calculate_linear(in_feature, num_elements):
    return torch.DoubleTensor([int(in_feature * num_elements)])

def counter_matmul(input_size, output_size):
    input_size = np.array(input_size)
    output_size = np.array(output_size)
    return np.prod(input_size) * output_size[-1]

### Layer Calculation functions

In [69]:
def count_parameters(m, x, y):
    total_params = 0
    for p in m.parameters():
        total_params += torch.DoubleTensor([p.numel()])
    m.total_params[0] = calculate_parameters(m.parameters())

def zero_ops(m, x, y):
    m.total_ops += calculate_zero_ops()

def count_convNd(m: nn.modules.conv._ConvNd, x, y: torch.Tensor):
    x = x[0]

    kernel_ops = torch.zeros(m.weight.size()[2:]).numel()
    bias_ops = 1 if m.bias is not None else 0

    m.total_ops += calculate_conv2d_flops(
        input_size = list(x.shape),
        output_size = list(y.shape),
        kernel_size = list(m.weight.shape),
        groups = m.groups,
        bias = m.bias
    )

def count_normalization(m: nn.modules.batchnorm._BatchNorm, x, y):
    x = x[0]
    flops = calculate_norm(x.numel())
    if (getattr(m, 'affine', False) or getattr(m, 'elementwise_affine', False)):
        flops *= 2
    m.total_ops += flops

def count_prelu(m, x, y):
    x = x[0]

    nelements = x.numel()
    if not m.training:
        m.total_ops += calculate_relu(nelements)

def count_relu(m, x, y):
    x = x[0]
    nelements = x.numel()
    m.total_ops += calculate_relu_flops(list(x.shape))

def count_softmax(m, x, y):
    x = x[0]
    nfeatures = x.size()[m.dim]
    batch_size = x.numel() // nfeatures
    m.total_ops += calculate_softmax(batch_size, nfeatures)

def count_avgpool(m, x, y):
    num_elements = y.numel()
    m.total_ops += calculate_avgpool(num_elements)

def count_adap_avgpool(m, x, y):
    kernel = torch.div(
        torch.DoubleTensor([*(x[0].shape[2:])]),
        torch.DoubleTensor([*(y.shape[2:])])
    )
    total_add = torch.prod(kernel)
    num_elements = y.numel()
    m.total_ops += calculate_adaptive_avg(total_add, num_elements)

def count_linear(m, x, y):
    total_mul = m.in_features
    num_elements = y.numel()
    m.total_ops += calculate_linear(total_mul, num_elements)

### Custom function

In [70]:
register_hooks = {
    nn.ZeroPad2d: zero_ops,
    nn.Conv1d: count_convNd,
    nn.Conv2d: count_convNd,
    nn.Conv3d: count_convNd,
    nn.ConvTranspose1d: count_convNd,
    nn.ConvTranspose2d: count_convNd,
    nn.ConvTranspose3d: count_convNd,
    nn.BatchNorm1d: count_normalization,
    nn.BatchNorm2d: count_normalization,
    nn.BatchNorm3d: count_normalization,
    nn.LayerNorm: count_normalization,
    nn.InstanceNorm1d: count_normalization,
    nn.InstanceNorm2d: count_normalization,
    nn.InstanceNorm3d: count_normalization,
    nn.PReLU: count_prelu,
    nn.Softmax: count_softmax,
    nn.ReLU: zero_ops,
    nn.ReLU6: zero_ops,
    nn.LeakyReLU: count_relu,
    nn.MaxPool1d: zero_ops,
    nn.MaxPool2d: zero_ops,
    nn.MaxPool3d: zero_ops,
    nn.AdaptiveMaxPool1d: zero_ops,
    nn.AdaptiveMaxPool2d: zero_ops,
    nn.AdaptiveMaxPool3d: zero_ops,
    nn.AvgPool1d: count_avgpool,
    nn.AvgPool2d: count_avgpool,
    nn.AvgPool3d: count_avgpool,
    nn.AdaptiveAvgPool1d: count_adap_avgpool,
    nn.AdaptiveAvgPool2d: count_adap_avgpool,
    nn.AdaptiveAvgPool3d: count_adap_avgpool,
    nn.Linear: count_linear,
    nn.Dropout: zero_ops,
    nn.Sequential: zero_ops
}

def custom_profile(
    model: nn.Module,
    inputs,
):
    handler_collection = {}
    types_collection = set()

    def add_hooks(m: nn.Module):
        m.register_buffer("total_ops", torch.zeros(1, dtype=torch.float64))
        m.register_buffer("total_params", torch.zeros(1, dtype=torch.float64))

        m_type = type(m)

        fn = None
        if m_type in register_hooks:
            fn = register_hooks[m_type]

        if fn is not None:
            handler_collection[m] = (
                m.register_forward_hook(fn),
                m.register_forward_hook(count_parameters),
            )
        types_collection.add(m_type)

    prev_training_status = model.training

    model.eval()
    model.apply(add_hooks)

    with torch.no_grad():
        model(*inputs)

    def dfs_count(module: nn.Module):
        total_ops, total_params = module.total_ops.item(), 0
        layer_info_dict = {}
        for n, m in module.named_children():
            next_dict = {}
            if m in handler_collection and not isinstance(
                m, (nn.Sequential, nn.ModuleList)
            ):
                m_ops, m_params = m.total_ops.item(), m.total_params.item()
            else:
                m_ops, m_params, next_dict = dfs_count(m)
            layer_info_dict[n] = (type(m).__name__, m_ops, m_params, next_dict) # layer_type, mac_count, param_count, next_nested_layer
            total_ops += m_ops
            total_params += m_params
        return total_ops, total_params, layer_info_dict

    total_ops, total_params, layer_info_dict = dfs_count(model)

    # reset model to original status
    model.train(prev_training_status)
    for m, (op_handler, params_handler) in handler_collection.items():
        op_handler.remove()
        params_handler.remove()
        m._buffers.pop("total_ops")
        m._buffers.pop("total_params")

    return total_ops, total_params, layer_info_dict

In [71]:
# Utility function to display layerwise mac count
def display_layerwise_mac(layer_names_list, info_dict):
    if not info_dict: return
    for key in info_dict:
        layer_names_list.append(key)
        if not info_dict[key][3]:
            print(f"{'.'.join(layer_names_list): <50} {info_dict[key][0]:<30} {info_dict[key][1]}")
        display_layerwise_mac(layer_names_list, info_dict[key][3])
        layer_names_list.pop()

### CNN

In [72]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1   = nn.Conv2d(3, 64, 3)
        self.pool1   = nn.MaxPool2d(2,2)
        self.conv2   = nn.Conv2d(64, 64, 3)
        self.pool2   = nn.MaxPool2d(2,2)
        self.conv3   = nn.Conv2d(64, 64, 3)
        self.pool3   = nn.MaxPool2d(2,2)

        self.f1      = nn.Linear(64 * 26 * 26, 128)
        self.f2      = nn.Linear(128, 128)
        self.f3      = nn.Linear(128, 53)


    def forward(self, x):

        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))

        x = x.view(-1, 26 * 26 * 64)

        x = F.relu(self.f1(x))
        x = F.relu(self.f2(x))
        x = self.f3(x)
        return x


In [73]:
model = CNN()
print(model)

CNN(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (f1): Linear(in_features=43264, out_features=128, bias=True)
  (f2): Linear(in_features=128, out_features=128, bias=True)
  (f3): Linear(in_features=128, out_features=53, bias=True)
)


In [74]:
torch.save(model, 'cnn_v1.pth')

In [75]:
loaded_model = torch.load('cnn_v1.pth')

  loaded_model = torch.load('cnn_v1.pth')


In [76]:
input = torch.randn(1, 3, 224, 224)
macs, params = profile(loaded_model, inputs=(input, ))
print(f"MAC using built-in function: {macs}")

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MAC using built-in function: 628385152.0


In [77]:
input = torch.randn(1, 3, 224, 224)
macs, params, layer_mac_info = custom_profile(loaded_model, inputs=(input, ))
print(f"MAC using custom function: {macs}")

MAC using custom function: 628385152.0


In [78]:
display_layerwise_mac([], layer_mac_info)

conv1                                              Conv2d                         85162752.0
pool1                                              MaxPool2d                      0.0
conv2                                              Conv2d                         437981184.0
pool2                                              MaxPool2d                      0.0
conv3                                              Conv2d                         99680256.0
pool3                                              MaxPool2d                      0.0
f1                                                 Linear                         5537792.0
f2                                                 Linear                         16384.0
f3                                                 Linear                         6784.0


### EfficientNet without Batchnormalization

In [79]:
import torchvision.models as models
from torchvision.models.efficientnet import EfficientNet_B0_Weights

class EfficientNet1(nn.Module):
    def __init__(self):
        super(EfficientNet1, self).__init__()
        # Load EfficientNetB0 backbone
        self.base_model = models.efficientnet_b0(pretrained=True)
        self.base_model.features.requires_grad_(False)  # Freeze base model layers

        # Custom classification head
        self.pooling = nn.AdaptiveAvgPool2d(1)
        # self.batch_norm = nn.BatchNorm1d(128)
        self.fc1 = nn.Linear(1280, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 53)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.pooling(x).squeeze(-1).squeeze(-1)  # Global average pooling
        x = self.fc1(x)
        # x = self.batch_norm(x)
        x = self.relu(x)
        x = self.fc2(x)
        return self.softmax(x)

In [80]:
model = EfficientNet1()
print(model)

EfficientNet1(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (sc



In [81]:
torch.save(model, 'EfficientNet_v1.pth')
loaded_model = torch.load('EfficientNet_v1.pth')

  loaded_model = torch.load('EfficientNet_v1.pth')


In [82]:
input = torch.randn(1, 3, 224, 224)
macs, params = profile(loaded_model, inputs=(input, ))
print(f"MAC using builtin function: {macs}")

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
MAC using builtin function: 414035806.0


In [83]:
input = torch.randn(1, 3, 224, 224)
macs, params, layer_mac_info = custom_profile(loaded_model, inputs=(input, ))
print(f"MAC using custom function: {macs}")

MAC using custom function: 414035806.0


In [84]:
display_layerwise_mac([], layer_mac_info)

base_model.features.0.0                            Conv2d                         10838016.0
base_model.features.0.1                            BatchNorm2d                    1605632.0
base_model.features.0.2                            SiLU                           0.0
base_model.features.1.0.block.0.0                  Conv2d                         3612672.0
base_model.features.1.0.block.0.1                  BatchNorm2d                    1605632.0
base_model.features.1.0.block.0.2                  SiLU                           0.0
base_model.features.1.0.block.1.avgpool            AdaptiveAvgPool2d              401440.0
base_model.features.1.0.block.1.fc1                Conv2d                         256.0
base_model.features.1.0.block.1.fc2                Conv2d                         256.0
base_model.features.1.0.block.1.activation         SiLU                           0.0
base_model.features.1.0.block.1.scale_activation   Sigmoid                        0.0
base_model.features.

### EfficientNet with Batchnormalization

In [85]:
import torchvision.models as models
from torchvision.models.efficientnet import EfficientNet_B0_Weights

class EfficientNet2(nn.Module):
    def __init__(self):
        super(EfficientNet2, self).__init__()
        # Load EfficientNetB0 backbone
        self.base_model = models.efficientnet_b0(pretrained=True)
        self.base_model.features.requires_grad_(False)  # Freeze base model layers

        # Custom classification head
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.batch_norm = nn.BatchNorm1d(128)
        self.fc1 = nn.Linear(1280, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 53)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.pooling(x).squeeze(-1).squeeze(-1)  # Global average pooling
        x = self.fc1(x)
        x = self.batch_norm(x)
        x = self.relu(x)
        x = self.fc2(x)
        return self.softmax(x)

In [86]:
model = EfficientNet2()
print(model)

EfficientNet2(
  (base_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (sc

In [87]:
torch.save(model, 'EfficientNet_v2.pth')

In [88]:
loaded_model = torch.load('EfficientNet_v2.pth')

  loaded_model = torch.load('EfficientNet_v2.pth')


In [89]:
input = torch.randn(1, 3, 224, 224)
macs, params = profile(loaded_model, inputs=(input, ))
print(f"MAC using builtin function: {macs}")

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm1d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
MAC using builtin function: 414036318.0


In [90]:
input = torch.randn(1, 3, 224, 224)
macs, params, layer_mac_info = custom_profile(loaded_model, inputs=(input, ))
print(f"MAC using custom function: {macs}")

MAC using custom function: 414036318.0


In [91]:
display_layerwise_mac([], layer_mac_info)

base_model.features.0.0                            Conv2d                         10838016.0
base_model.features.0.1                            BatchNorm2d                    1605632.0
base_model.features.0.2                            SiLU                           0.0
base_model.features.1.0.block.0.0                  Conv2d                         3612672.0
base_model.features.1.0.block.0.1                  BatchNorm2d                    1605632.0
base_model.features.1.0.block.0.2                  SiLU                           0.0
base_model.features.1.0.block.1.avgpool            AdaptiveAvgPool2d              401440.0
base_model.features.1.0.block.1.fc1                Conv2d                         256.0
base_model.features.1.0.block.1.fc2                Conv2d                         256.0
base_model.features.1.0.block.1.activation         SiLU                           0.0
base_model.features.1.0.block.1.scale_activation   Sigmoid                        0.0
base_model.features.