In [85]:
!pip3 install keras
!pip3 install tqdm
!pip install tqdm
# !conda install keras
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras

from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.mobilenet import MobileNet
from keras.applications.inception_v3 import InceptionV3

from keras.models import Model
from keras.layers import Dense, Dropout, Flatten

import os
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m
[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [86]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [87]:
# Input Shape
rows = 224
cols = 224
chan = 3

In [88]:
model_vgg19        = VGG19(include_top = False, input_shape=(rows, cols, chan))
model_resnet50     = ResNet50(include_top = False, input_shape=(rows, cols, chan))
model_mobilenet    = MobileNet(weights = None, include_top = False, input_shape=(rows, cols, chan))
model_inception_v3 = InceptionV3(include_top = False, input_shape=(rows, cols, chan))



In [104]:
# Hardware Model
memory_bandwidth  = 10e9 # B/sec (GB)
internal_mem_size = 16e6 # Bytes (MB)
mnk = (32, 32, 32)       # mat_mul primitive size in BLAS notation
parallel_matmuls = 1     # number of matrix multiplication primitives operating in parallel
matmul_rate = 1e9 / 32   # number of matrix multiplication primitive completions per second
vec_prim_size = 32       # vector primitive size Nx1
parallel_vec_prims = 1   # number of vector primitives operating in parallel
vec_prim_rate = 1e9      # number of vector primitive completions per second

In [105]:
layer_names = []
layer_types = []

Ni = []
Lr = []
Lc = []

No = []
Mr = []
Mc = []

Fr = []
Fc = []
Sr = []
Sc = []

f_mem = []

# first we loop through the model and grab information
# from model

# model = model_mobilenet
model = model_vgg19
# model = model_resnet50
# model = model_inception_v3

for layer in model.layers:
    # create a list of layer types
    layer_type = layer.__class__.__name__
    layer_types.append(layer_type)
    
    # create a list of layer names
    layer_names.append(layer.name)
    
    # create lists for Ni, Lr, Lc, No, Mr, Mc
    if layer.__class__.__name__ in ['Concatenate', 'Add']:
        # these types of layers take in multiple shapes. For now
        # we can just model as the output shape
        Lr.append(layer.output_shape[1]) # rows
        Lc.append(layer.output_shape[2]) # columns
        Ni.append(layer.output_shape[3]) # channels
        Mr.append(layer.output_shape[1]) # rows
        Mc.append(layer.output_shape[2]) # columns
        No.append(layer.output_shape[3]) # channels
    else:
        # create a list of layer inputs Ni, Lr, Lc
        if (len(layer.input_shape) == 2):
            Lr.append(layer.input_shape[1]) # rows
            Lc.append(1)
            Ni.append(1)
        elif (len(layer.input_shape) == 3):
            Lr.append(layer.input_shape[1]) # rows
            Lc.append(layer.input_shape[2]) # columns
            Ni.append(1)
        else:
            Lr.append(layer.input_shape[1]) # rows
            Lc.append(layer.input_shape[2]) # columns
            Ni.append(layer.input_shape[3]) # channels

        # create lists for layer outputs No, Mr, Mc
        if (len(layer.output_shape) == 2):
            Mr.append(layer.output_shape[1]) # rows
            Mc.append(1)
            No.append(1)
        elif (len(layer.output_shape) == 3):
            Mr.append(layer.output_shape[1]) # rows
            Mc.append(layer.output_shape[2]) # columns
            No.append(1)
        else:
            Mr.append(layer.output_shape[1]) # rows
            Mc.append(layer.output_shape[2]) # columns
            No.append(layer.output_shape[3]) # channels
    
    
    # create lists for filters: Fr, Fc, Sr, Sc
    if 'filters' in layer.get_config():
        Fr.append(layer.kernel_size[0])
        Fc.append(layer.kernel_size[1])
        Sr.append(layer.strides[0])
        Sc.append(layer.strides[1])
    else:
        Fr.append(1)
        Fc.append(1)
        Sr.append(1)
        Sc.append(1)
        
    # determine filter memory
    f_mem.append(layer.count_params())
    

In [106]:
in_mem = []
in_move = []
in_us = []
out_mem = []
out_move = []
f_move = []
f_us = []
f_group = []
bandwidth = []
in_move_time = []
out_move_time = []
out_ds = []
f_move_time = []
tot_move_time = []
M = []
N = []
K = []
MACS = []
MACS_per_cycle = []
time_ser = []
time_par = []
matrix_compute = []
vector_compute = []
total_compute = []

for i, layer in enumerate(layer_names):

    # append memory of input tensor to in_mem
    in_mem.append(Ni[i]*Lr[i]*Lc[i])
    
    # see if input tensor fits on internal memory
    if in_mem[0] > internal_mem_size:
        in_move.append(1)
    else: in_move.append(0)
        
    # in upsampling will be not be included in any of these models
    in_us.append(1)
    
    # append memory of output tensor to out_mem
    out_mem.append(No[i]*Mr[i]*Mc[i])
    
    # see if output tensor fits on internal memory
    if out_mem[0] > internal_mem_size:
        out_move.append(1)
    else: out_move.append(0)
       
    # filter coefficients will always be off device
    f_move.append(1)
    
    # filter grouping
    f_group.append(Ni[i])
    
    # filter upsampling
    f_us.append(1)
    
    # determine output feature map downsampling ratio
    # We can assume it's simply the stride rate, 
    # and that the stride rate will be the same for row and column
    out_ds.append(Sr[i]) 
    
    # determine bandwidth
    bandwidth.append((in_mem[i]*in_move[i]) + (out_mem[i]*out_move[i]) + (f_mem[i]*f_move[i]))
    
    # in, out, filter movement times
    in_move_time.append((in_mem[i]*in_move[i]) / memory_bandwidth)
    out_move_time.append((out_mem[i]*out_move[i]) / memory_bandwidth)
    f_move_time.append((f_mem[i]*f_move[i]) / memory_bandwidth)
    
    # total movement time
    tot_move_time.append(bandwidth[i] / memory_bandwidth)
    
    # M, N, K
    M.append(No[i])
    N.append(Lr[i] * Lc[i])
    K.append(Ni[i]*Fr[i]*Fc[i])
    
    # MACs
    MACS.append(M[i] * N[i] * K[i])
    
    # Compute Times
    
    # CNN Style Convolution Compute Time
    # based on hardware matrix compute parameters
    if layer_types[i] in ['Conv2D','DepthwiseConv2D']:
        MACS_per_cycle = mnk[0]*mnk[1]*mnk[2] # matmul rate has a /32 built in so we need all three
        matrix_compute.append(((MACS[i] / MACS_per_cycle) / matmul_rate) / parallel_matmuls)
        vector_compute.append(0)
        total_compute.append(matrix_compute[i])
    elif layer_types[i] in ['ReLU', 'Add']: 
        # Bias and RELU are free
        matrix_compute.append(0)
        vector_compute.append(0)
        total_compute.append(0)
    else: #Pooling included here
        matrix_compute.append(0)
        vector_compute.append(((max(out_mem[i], in_mem[i]) / vec_prim_size) / vec_prim_rate) / parallel_vec_prims )
        total_compute.append(vector_compute[i])
        
        
    # Time Serial
    time_ser.append(tot_move_time[i] + total_compute[i])
    
    # Time Parallel
    time_par.append(max(tot_move_time[i], total_compute[i]))

In [107]:
# Create a summation row for the csv
in_move_time_sum = sum(in_move_time)
f_move_time_sum = sum(f_move_time)
out_move_time_sum = sum(out_move_time)
tot_move_time_sum = sum(tot_move_time)
matrix_compute_sum = sum(matrix_compute)
vector_compute_sum = sum(vector_compute)
total_compute_sum = sum(total_compute)
time_ser_sum = sum(time_ser)
time_par_sum = sum(time_par)

In [108]:
data_tuples = list(zip(
    layer_names,
    layer_types, 
    Ni, 
    Lr, 
    Lc,
    in_mem,
    in_move,
    in_us,
    No, 
    Mr, 
    Mc,
    out_mem,
    out_move,
    out_ds,
    Fr, 
    Fc, 
    Sr, 
    Sc,
    f_us,
    f_group,
    f_mem,
    f_move,
    bandwidth,
    in_move_time,
    out_move_time,
    f_move_time,
    tot_move_time,
    M, 
    N,
    K,
    MACS,
    time_ser,
    time_par,
    matrix_compute,
    vector_compute,
    total_compute))

df1 = pd.DataFrame(data_tuples, columns=[
    'Layer Names',
    'Layer Types', 
    'Ni', 
    'Lr', 
    'Lc',
    'Input Mem',
    'Input Move?',
    'Input Upsampling Ratio',
    'No', 
    'Mr', 
    'Mc',
    'Output Mem',
    'Output Move?',
    'Output Downsampling Ratio',
    'Fr', 
    'Fc', 
    'Sr', 
    'Sc',
    'Filter Upsampling Ratio',
    'Filter Grouping', # ??? Different from Ni?
    'Filter Mem',
    'Filter Move?',
    'Bandwidth',
    'Input Move',
    'Output Move',
    'Filter Move',
    'Total Move Time',
    'M', 
    'N',
    'K',
    'MACS',
    'Serial Time',
    'Parallel Time',
    'Matrix Compute',
    'Vector Compute',
    'Total Compute'])

# df.style.apply(highlight_max, subset=['M', 'N', 'K'])
df1

Unnamed: 0,Layer Names,Layer Types,Ni,Lr,Lc,Input Mem,Input Move?,Input Upsampling Ratio,No,Mr,Mc,Output Mem,Output Move?,Output Downsampling Ratio,Fr,Fc,Sr,Sc,Filter Upsampling Ratio,Filter Grouping,Filter Mem,Filter Move?,Bandwidth,Input Move,Output Move,Filter Move,Total Move Time,M,N,K,MACS,Serial Time,Parallel Time,Matrix Compute,Vector Compute,Total Compute
0,input_15,InputLayer,3,224,224,150528,0,1,3,224,224,150528,0,1,1,1,1,1,1,3,0,1,0,0.0,0.0,0.0,0.0,3,50176,3,451584,4.704e-06,4.704e-06,0.0,4.704e-06,4.704e-06
1,conv1_pad,ZeroPadding2D,3,224,224,150528,0,1,3,225,225,151875,0,1,1,1,1,1,1,3,0,1,0,0.0,0.0,0.0,0.0,3,50176,3,451584,4.746094e-06,4.746094e-06,0.0,4.746094e-06,4.746094e-06
2,conv1,Conv2D,3,225,225,151875,0,1,32,112,112,401408,0,2,3,3,2,2,1,3,864,1,864,0.0,0.0,8.64e-08,8.64e-08,32,50625,27,43740000,4.280124e-05,4.271484e-05,4.3e-05,0.0,4.271484e-05
3,conv1_bn,BatchNormalization,32,112,112,401408,0,1,32,112,112,401408,0,1,1,1,1,1,1,32,128,1,128,0.0,0.0,1.28e-08,1.28e-08,32,12544,32,12845056,1.25568e-05,1.2544e-05,0.0,1.2544e-05,1.2544e-05
4,conv1_relu,ReLU,32,112,112,401408,0,1,32,112,112,401408,0,1,1,1,1,1,1,32,0,1,0,0.0,0.0,0.0,0.0,32,12544,32,12845056,0.0,0.0,0.0,0.0,0.0
5,conv_dw_1,DepthwiseConv2D,32,112,112,401408,0,1,32,112,112,401408,0,1,1,1,1,1,1,32,288,1,288,0.0,0.0,2.88e-08,2.88e-08,32,12544,32,12845056,1.25728e-05,1.2544e-05,1.3e-05,0.0,1.2544e-05
6,conv_dw_1_bn,BatchNormalization,32,112,112,401408,0,1,32,112,112,401408,0,1,1,1,1,1,1,32,128,1,128,0.0,0.0,1.28e-08,1.28e-08,32,12544,32,12845056,1.25568e-05,1.2544e-05,0.0,1.2544e-05,1.2544e-05
7,conv_dw_1_relu,ReLU,32,112,112,401408,0,1,32,112,112,401408,0,1,1,1,1,1,1,32,0,1,0,0.0,0.0,0.0,0.0,32,12544,32,12845056,0.0,0.0,0.0,0.0,0.0
8,conv_pw_1,Conv2D,32,112,112,401408,0,1,64,112,112,802816,0,1,1,1,1,1,1,32,2048,1,2048,0.0,0.0,2.048e-07,2.048e-07,64,12544,32,25690112,2.52928e-05,2.5088e-05,2.5e-05,0.0,2.5088e-05
9,conv_pw_1_bn,BatchNormalization,64,112,112,802816,0,1,64,112,112,802816,0,1,1,1,1,1,1,64,256,1,256,0.0,0.0,2.56e-08,2.56e-08,64,12544,64,51380224,2.51136e-05,2.5088e-05,0.0,2.5088e-05,2.5088e-05


In [109]:
df1.to_csv('vgg19_by_layer.csv')

In [110]:
data_tuples = [[in_move_time_sum, f_move_time_sum, out_move_time_sum, tot_move_time_sum,
        matrix_compute_sum, vector_compute_sum, total_compute_sum, time_ser_sum, time_par_sum]]

df2 = pd.DataFrame(data_tuples, columns = ('Total Input Move Time',  'Total Filter Move Time', 
                                         'Total Output Move Time', 'Total Move Time', 
                                         'Total Matrix Compute time',  'Total Vector Compute Time',
                                         'Total Compute Time', 'Total Serial Time',  'Total Parallel Time'))
df2

Unnamed: 0,Total Input Move Time,Total Filter Move Time,Total Output Move Time,Total Move Time,Total Matrix Compute time,Total Vector Compute Time,Total Compute Time,Total Serial Time,Total Parallel Time
0,0.0,0.000323,0.0,0.000323,0.001198,0.000216,0.001414,0.001737,0.001496


In [111]:
df2.to_csv('vgg19_summations.csv')