# Operational Intensity Analysis Tool

In [10]:
import numpy as np
from keras.applications import VGG16, MobileNet, Xception, ResNet50

In [2]:
def get_kernel_size(layer):
    layer_type = layer.__class__.__name__
    if 'Conv2D' in layer_type or 'Dense' in layer_type:
        weights = layer.get_weights()
        return weights[0].shape
    else:
        return 0

In [3]:
def getFLOPS(layer, kernel, output):
    # For Xception
    if "Separable" in layer:
        return np.prod(output) * (np.prod(kernel[:-2]) + kernel[-2])
    # For MobileNet
    elif "Depthwise" in layer:
        return np.prod(output) * np.prod(kernel[:-2])
    # Regular Convolution
    elif "Conv2D" in layer or "Dense" in layer:
        return np.prod(output) * np.prod(kernel[:-1])
    else:
        return 0

In [4]:
def intensity(model):
    print('Layer Name | Layer Type | Kernel Size | Kernel Mem | Output Size | Output Mem | FLOPS')
    print('--- | --- | --- | --- | --- | --- | ---')
    sum_kernel_mem = 0
    sum_output_mem = 0
    sum_flops = 0
    for l in model.layers:
        layer_type = l.__class__.__name__
        kernel_size = get_kernel_size(l)
        kernel_mem = np.prod(kernel_size)
        output_size = l.output_shape[1:]
        output_mem = np.prod(output_size)
        flops = getFLOPS(layer_type, kernel_size, output_size)
        print(l.name, '|', layer_type, '|', 
              kernel_size, '|', "{:,}".format(kernel_mem), '|', 
              output_size, '|', "{:,}".format(output_mem), '|', 
              "{:,}".format(flops))
        sum_kernel_mem += kernel_mem
        sum_output_mem += output_mem
        sum_flops += flops
    print('- | - | - | - | - | - | -')
    print('- | - | - | Kernel Mem (Total) | - | Output Mem (Total) | FLOPS (Total)')
    print('Summary | - | - |', 
          "{:,}".format(sum_kernel_mem), '| - |', 
          "{:,}".format(sum_output_mem), '|', 
          "{:,}".format(sum_flops))
    print('\n----------------------\n')
    print('Model Name: %s' % model.name)
    print('Overall FLOPS: %.f MFLOPS' % (sum_flops/1024/1024))
    print('Overall Memory: %.f MB' % ((sum_kernel_mem + sum_output_mem)/1024/1024))
    # Default dtype is float32 (4Byte)
    op_intensity = sum_flops / ((sum_kernel_mem + sum_output_mem) * 4)
    print('Operational Intensity = %.f FLOPS/Byte' % op_intensity)

In [5]:
intensity(VGG16())

Layer Name | Layer Type | Kernel Size | Kernel Mem | Output Size | Output Mem | FLOPS
--- | --- | --- | --- | --- | --- | ---
input_1 | InputLayer | 0 | 0 | (224, 224, 3) | 150,528 | 0
block1_conv1 | Conv2D | (3, 3, 3, 64) | 1,728 | (224, 224, 64) | 3,211,264 | 86,704,128
block1_conv2 | Conv2D | (3, 3, 64, 64) | 36,864 | (224, 224, 64) | 3,211,264 | 1,849,688,064
block1_pool | MaxPooling2D | 0 | 0 | (112, 112, 64) | 802,816 | 0
block2_conv1 | Conv2D | (3, 3, 64, 128) | 73,728 | (112, 112, 128) | 1,605,632 | 924,844,032
block2_conv2 | Conv2D | (3, 3, 128, 128) | 147,456 | (112, 112, 128) | 1,605,632 | 1,849,688,064
block2_pool | MaxPooling2D | 0 | 0 | (56, 56, 128) | 401,408 | 0
block3_conv1 | Conv2D | (3, 3, 128, 256) | 294,912 | (56, 56, 256) | 802,816 | 924,844,032
block3_conv2 | Conv2D | (3, 3, 256, 256) | 589,824 | (56, 56, 256) | 802,816 | 1,849,688,064
block3_conv3 | Conv2D | (3, 3, 256, 256) | 589,824 | (56, 56, 256) | 802,816 | 1,849,688,064
block3_pool | MaxPooling2D | 0 | 0 |

In [6]:
intensity(MobileNet())

Layer Name | Layer Type | Kernel Size | Kernel Mem | Output Size | Output Mem | FLOPS
--- | --- | --- | --- | --- | --- | ---
input_2 | InputLayer | 0 | 0 | (224, 224, 3) | 150,528 | 0
conv1 | Conv2D | (3, 3, 3, 32) | 864 | (112, 112, 32) | 401,408 | 10,838,016
conv1_bn | BatchNormalization | 0 | 0 | (112, 112, 32) | 401,408 | 0
conv1_relu | Activation | 0 | 0 | (112, 112, 32) | 401,408 | 0
conv_dw_1 | DepthwiseConv2D | (3, 3, 32, 1) | 288 | (112, 112, 32) | 401,408 | 3,612,672
conv_dw_1_bn | BatchNormalization | 0 | 0 | (112, 112, 32) | 401,408 | 0
conv_dw_1_relu | Activation | 0 | 0 | (112, 112, 32) | 401,408 | 0
conv_pw_1 | Conv2D | (1, 1, 32, 64) | 2,048 | (112, 112, 64) | 802,816 | 25,690,112
conv_pw_1_bn | BatchNormalization | 0 | 0 | (112, 112, 64) | 802,816 | 0
conv_pw_1_relu | Activation | 0 | 0 | (112, 112, 64) | 802,816 | 0
conv_dw_2 | DepthwiseConv2D | (3, 3, 64, 1) | 576 | (56, 56, 64) | 200,704 | 1,806,336
conv_dw_2_bn | BatchNormalization | 0 | 0 | (56, 56, 64) | 200,704

In [9]:
intensity(Xception(input_shape=(299, 299, 3)))

Layer Name | Layer Type | Kernel Size | Kernel Mem | Output Size | Output Mem | FLOPS
--- | --- | --- | --- | --- | --- | ---
input_4 | InputLayer | 0 | 0 | (299, 299, 3) | 268,203 | 0
block1_conv1 | Conv2D | (3, 3, 3, 32) | 864 | (149, 149, 32) | 710,432 | 19,181,664
block1_conv1_bn | BatchNormalization | 0 | 0 | (149, 149, 32) | 710,432 | 0
block1_conv1_act | Activation | 0 | 0 | (149, 149, 32) | 710,432 | 0
block1_conv2 | Conv2D | (3, 3, 32, 64) | 18,432 | (147, 147, 64) | 1,382,976 | 398,297,088
block1_conv2_bn | BatchNormalization | 0 | 0 | (147, 147, 64) | 1,382,976 | 0
block1_conv2_act | Activation | 0 | 0 | (147, 147, 64) | 1,382,976 | 0
block2_sepconv1 | SeparableConv2D | (3, 3, 64, 1) | 576 | (147, 147, 128) | 2,765,952 | 201,914,496
block2_sepconv1_bn | BatchNormalization | 0 | 0 | (147, 147, 128) | 2,765,952 | 0
block2_sepconv2_act | Activation | 0 | 0 | (147, 147, 128) | 2,765,952 | 0
block2_sepconv2 | SeparableConv2D | (3, 3, 128, 1) | 1,152 | (147, 147, 128) | 2,765,952 

In [11]:
intensity(ResNet50())

Layer Name | Layer Type | Kernel Size | Kernel Mem | Output Size | Output Mem | FLOPS
--- | --- | --- | --- | --- | --- | ---
input_5 | InputLayer | 0 | 0 | (224, 224, 3) | 150,528 | 0
conv1 | Conv2D | (7, 7, 3, 64) | 9,408 | (112, 112, 64) | 802,816 | 118,013,952
bn_conv1 | BatchNormalization | 0 | 0 | (112, 112, 64) | 802,816 | 0
activation_1 | Activation | 0 | 0 | (112, 112, 64) | 802,816 | 0
max_pooling2d_1 | MaxPooling2D | 0 | 0 | (55, 55, 64) | 193,600 | 0
res2a_branch2a | Conv2D | (1, 1, 64, 64) | 4,096 | (55, 55, 64) | 193,600 | 12,390,400
bn2a_branch2a | BatchNormalization | 0 | 0 | (55, 55, 64) | 193,600 | 0
activation_2 | Activation | 0 | 0 | (55, 55, 64) | 193,600 | 0
res2a_branch2b | Conv2D | (3, 3, 64, 64) | 36,864 | (55, 55, 64) | 193,600 | 111,513,600
bn2a_branch2b | BatchNormalization | 0 | 0 | (55, 55, 64) | 193,600 | 0
activation_3 | Activation | 0 | 0 | (55, 55, 64) | 193,600 | 0
res2a_branch2c | Conv2D | (1, 1, 64, 256) | 16,384 | (55, 55, 256) | 774,400 | 49,561,6