In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
from torch import nn

from thop import clever_format
from thop import profile


In [13]:
def complexity_bar_graph(per_layer_complexity_data, file_prefix,plot = 0):
    mac_data = []
    params_data = []
    act_data = []
    dp_data = []
    weight_reuse = []
    input_reuse = []

    for compute_layer in per_layer_complexity_data:
        mac_data.append(compute_layer[1])
        params_data.append(compute_layer[2])
        act_data.append(compute_layer[3])
        dp_data.append(compute_layer[4])
        weight_reuse.append(compute_layer[5])
        input_reuse.append(compute_layer[6])
    
    if plot == 1:
        fig = plt.figure(figsize=(20, 10))
        ax = fig.add_subplot(111)
        ax.bar(np.arange(len(act_data)) - 0.125, act_data, width=0.25, label='activation')
        ax.bar(np.arange(len(act_data)) + 0.125, input_reuse, width=0.25, label='data reuse')
        ax.set_title('number of activation per layer and input reuse factor', fontsize=35)
        ax.tick_params(axis="x", labelsize=35)
        ax.tick_params(axis="y", labelsize=35)
        ax.set_xlabel('layer idx', fontsize=35)
        ax.set_yscale('log')
        ax.grid()
        ax.legend(fontsize=25)
        plt.savefig(file_prefix + 'act.png')

        fig = plt.figure(figsize=(20, 10))
        ax = fig.add_subplot(111)
        ax.bar(np.arange(len(params_data)) - 0.125, params_data, width=0.25, label='weights')
        ax.bar(np.arange(len(weight_reuse)) + 0.125, weight_reuse, width=0.25, label='weight reuse')
        ax.set_yscale('log')
        ax.set_title('number of weights per layer and weight reuse factor', fontsize=35)
        ax.tick_params(axis="x", labelsize=35)
        ax.tick_params(axis="y", labelsize=35)
        ax.set_xlabel('layer idx', fontsize=35)
        ax.grid()
        ax.legend(fontsize=25)
        plt.savefig(file_prefix + 'weights.png')

        fig = plt.figure(figsize=(20, 10))
        ax = fig.add_subplot(111)
        ax.bar(np.arange(len(dp_data)), dp_data, width=0.5)
        ax.set_yscale('log')
        ax.set_title('number of DP per layer', fontsize=35)
        ax.tick_params(axis="x", labelsize=35)
        ax.tick_params(axis="y", labelsize=35)
        ax.set_xlabel('layer idx', fontsize=35)
        ax.grid()
        plt.savefig(file_prefix + 'dp.png')

        fig = plt.figure(figsize=(20, 10))
        ax = fig.add_subplot(111)
        ax.bar(np.arange(len(mac_data)), mac_data, width=0.5)
        ax.set_yscale('log')
        ax.set_title('number of MAC per layer', fontsize=35)
        ax.tick_params(axis="x", labelsize=35)
        ax.tick_params(axis="y", labelsize=35)
        ax.set_xlabel('layer idx', fontsize=35)
        ax.grid()
        plt.savefig(file_prefix + 'mac.png')


def profile_model(model, file_prefix):
    input = torch.randn(1, 3, 224, 224)

    macs, params, num_act, num_dp, per_compute_layer_complexity = profile(model, inputs=(input,))
    storage = clever_format([(num_act + params) * 8 / 8], "%.3f")

    macs, params, num_act, num_dp = clever_format([macs, params, num_act, num_dp], "%.3f")

    print(
        'activations:', num_act,
        'weight:', params,
        'num_dp:', num_dp,
        'macs:', macs
    )

    #complexity_bar_graph(per_compute_layer_complexity, file_prefix)
    mac_data = []
    params_data = []
    act_data = []
    dp_data = []
    weight_reuse = []
    input_reuse = []

    for compute_layer in per_compute_layer_complexity:
        mac_data.append(compute_layer[1])
        params_data.append(compute_layer[2])
        act_data.append(compute_layer[3])
        dp_data.append(compute_layer[4])
        weight_reuse.append(compute_layer[5])
        input_reuse.append(compute_layer[6])


In [27]:
#if __name__ == '__main__':
    # model = torchvision.models.resnet18()
    # file_prefix = 'resnet18_'
    # model = torchvision.models.vgg11()
    # file_prefix = 'vgg11_'
    # model = torchvision.models.vgg16()
    # file_prefix = 'vgg16_'
    # model = Net(10)
    # file_prefix='alex_cifar_'
    
    
"""
NOTE: I'm pretty sure when it says 'act:' or activations in the output it is talking about input activations 
(number of input values going in). Can see this by first layer: 224x224x224 x 3 = 150528
"""
model = torchvision.models.alexnet()
file_prefix='alexnet_'
print("Alexnet:")

profile_model(model, file_prefix)
sample = torch.randn(1, 3, 224, 224)
print(model.features[0](sample).shape)
print(model.features[1](model.features[0](sample)).shape)
print(model.features[2](model.features[1](model.features[0](sample))).shape)
print(model)

Alexnet:
[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find rule for <class 'torchvision.models.alexnet.AlexNet'>. Treat it as zero Macs and zero Params.[00m
		 Conv2d (ops: 70276800.0 params: 23296.0 act: 150528.0 dp: 193600.0 )
		 ReLU (ops: 0.0 params: 0.0 act: 0.0 dp: 0.0 )
		 MaxPool2d (ops: 0.0 params: 0.0 act: 0.0 dp: 0.0 )
		 Conv2d (ops: 223948800.0 params: 307392.0 act: 46656.0 dp: 139968.0 )
		 ReLU (ops: 0.0 pa

In [17]:
model = torchvision.models.resnet18()
file_prefix = 'resnet18_'
    # model = torchvision.models.vgg11()
    # file_prefix = 'vgg11_'
    # model = torchvision.models.vgg16()
    # file_prefix = 'vgg16_'


profile_model(model, file_prefix)
print(model)

"""
input of 3x224x224
For the first layer: (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
shouldn't it be 3x64 = 192 weights? how are there 9408 weights? 
output activations should be 224x224x64 / 4 (4 for stride) = 802816; divide by 4 again and you get 200,704 which is correct but why divide by 16 if stride is only 2,2?
"""

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[91m[WARN] Cannot find rule for <class 'torchvision.models.resnet.BasicBlock'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find rule for <class 'torchvision.models.resnet.ResNet'>. Treat it as zero Macs and zero Params.[00m
	 Conv2d (ops: 118013952.0 params: 9408.0 act: 150528.0 dp: 802816.0 )
	 BatchNorm2d (ops: 0.0 params: 128.0 act: 0.0 dp: 0.0 )
	 ReLU (ops: 0.0 par

"\nFor the first layer, shouldn't it be 3x64 = 192 weights? how are there 9408 weights? \nactivations, should be 224x224x3\n"