In [68]:
########################################################################################################################
# Performance Prediction Tool
# 
# Input
#  Network specification from high level library
#  Parameterized hardware model
# 
# Output
#  Performance prediction
#  Per layer
#  Total network
#
# Resource Used:-
#   https://github.com/tensorflow/models/tree/master/research/slim 
#
# Written By:-
#  Aadish Joshi
#
# In Collaboration with:
#  Piyush Mahatkar
#  Varun Parashar
########################################################################################################################

import tensorflow as tf
import csv
from nets import inception_v3
from nets import vgg
from nets import mobilenet_v1
from nets import resnet_v1

In [69]:
########################################################################################################################
# Hardware Model Settings
########################################################################################################################

settings = {
    'Hardware': {
        'memory': {
            'ie-bandwidth': 512000000,
            'internal memory': 8000000
        },
        'matrix premitive' :{
            'dimensions':32,
            'operations' :1,
            'completions': 1e9/32
            
        },
        'vector premitive' :{
            'dimensions':32,
            'operations' :1,
            'completions' : 1e9
        }
    }
}

In [70]:
rows = [["Position", "Name", "Operation", "Inputs", "Filter", "Stride", "Outputs"]]

In [71]:
########################################################################################################################
# Extracting data per operations
# Conv2D/ Pooling
########################################################################################################################

def extract_data(operation):
    for i in range(len(opers)):
        command = opers[i]
        node = command.node_def
        inputDim = []
        filterDim = tf.TensorShape(None)
        stride = ""
        padding = ""
        unit_row=[]
        
        
        if command.type in ['Conv2D','DepthwiseConv2dNative']:
            inputDim = command.inputs[0].get_shape().as_list()
            filterDim = command.inputs[1].get_shape().as_list()
            outputDim = command.outputs[0].get_shape().as_list()
            stride = node.attr['strides'].list.i
            dilation = node.attr['dilations'].list.i
            padding = node.attr['padding']
            unit_row = [i+1,command.type,inputDim,dilation,filterDim,stride,outputDim]
            rows.append(unit_row)
            
            
        if "pool" in command.type.lower():
            inputDim = command.inputs[0].get_shape().as_list()
            stride = node.attr['strides'].list.i
            outputDim = command.outputs[0].get_shape().as_list()
            filterDim = node.attr['ksize'].list.i
            dilation = [1,1,1,1]
            unit_row = [i+1,command.type,inputDim,dilation,filterDim,stride,outputDim]
            rows.append(unit_row)
    
    return rows

In [72]:
########################################################################################################################
# Graph Model
########################################################################################################################

graph = tf.Graph()
with graph.as_default():
    inputs = tf.placeholder(tf.float32,shape=[1,2048,2048,3],name='inputs')
    resnet_v1.resnet_v1_50(inputs=inputs)
    # inception_v3.inception_v3(inputs=inputs)
    # vgg.vgg_a(inputs=inputs)
    # mobilenet_v1.mobilenet_v1(inputs=inputs)

operations = graph.get_operations()

In [73]:
rows = [];
rows = extract_data(operations)

In [74]:
outputs=[[]]
outputs = [["Name", "#feature Map",
            "Internal Memo","U_r","U_c","N_i","L_r","L_c","In Move",
            "Out Mem","S_r","S_c","N_o","M_r","M_c","Out Move",
            "Cx Mem","D_r","D_c","g","F_r","F_c","Cx Move",
            "Compute Matrix Time","Comput Vector Time","Total Compute Time",
            "band","Input Data Movement time","Output Data movement time","Filter Data movement time","data_movement_time",
            "serial time","Parallel Time"]
]

In [75]:
memory = settings['Hardware']['memory']['internal memory']
bandwidth = settings['Hardware']['memory']['ie-bandwidth']
dimension = settings['Hardware']['matrix premitive']['dimensions']
frequency = settings['Hardware']['vector premitive']['completions']

In [76]:
########################################################################################################################
# Time initialization
########################################################################################################################

nets_inputDataMovementTime = 0.0

nets_outputDataMovementTime = 0.0

nets_filterDataMovementTime = 0.0

nets_totalDataMovementTime = 0.0

nets_MatrixComputeTime = 0.0

nets_VectorComputeTime = 0.0

TotalComputeTime = 0.0

nets_serialTime = 0.0

nets_parallelTime = 0.0

In [77]:
########################################################################################################################
# Row iteration and output generation
########################################################################################################################

for i,row in enumerate(rows):
    output=[]
    
    #################################################################################
    # Data gathering
    ################################################################################
    inputDimensions = row[2]
    dilation = row[3]
    filterDimensions = row[4]
    stride = row[5]
    outputDimensions = row[6]
    
    i + 1, 
    
    operationType=row[1]
    
    #################################################################################
    # Input data nf,ni,Lr, Lc, number of bytes at input Calculations
    # if #bytesInput exceeds internal memory we put it in external memory
    ################################################################################
    
    N_f_m = inputDimensions[0]
    N_i = inputDimensions[3]
    L_r = inputDimensions[1]
    L_c = inputDimensions[2]
    
    numBytesInput = N_i*L_r*L_c
    
    in_memory = 1 if numBytesInput > memory or i ==0 else 0
    
    #################################################################################
    # Output data no,Mr, Mc, number of bytes at output Calculations
    # if #bytesOutput exceeds internal memory we put it in external memory
    ################################################################################
    
    
    N_o = outputDimensions[3]
    M_r = outputDimensions[1]
    M_c = outputDimensions[2]
    
    numBytesOutput = N_o*M_r*M_c
    
    out_memory = 1 if numBytesOutput > memory or i == len(rows)-1 else 0
    
    #################################################################################
    # Stride and filter data calculations
    ################################################################################
    
    S_r = stride[1]
    S_c = stride[2]
    
    F_r = filterDimensions[1]
    F_c = filterDimensions[2]
    
    g = outputDimensions[3] if "depth"in operationType.lower() else 1
    
    numBytesFilter = 0 if "pool" in operationType.lower() else filterDimensions[0]*filterDimensions[1]*filterDimensions[2]*filterDimensions[3] 
    
    filter_memory = 0 if "pool" in operationType.lower() else 1
    
    #################################################################################
    # Dilation calculations
    ################################################################################
    
    D_r = dilation[1]
    D_c = dilation[2]
    
    #################################################################################
    # M,N,K, MACS calculations
    ################################################################################
    
    M = outputDimensions[3],
    K = outputDimensions[1]*outputDimensions[2],
    N = filterDimensions[0]*filterDimensions[1]*filterDimensions[2],
    
    MAC =outputDimensions[3]*outputDimensions[1]*outputDimensions[2]*filterDimensions[0]*filterDimensions[1]*filterDimensions[2]
    
    MAC_Cycle = dimension*dimension*dimension
    
    MatrixComputeTime = MAC/(MAC_Cycle*frequency/32)
    
    VectorComputeTime = 0 if "conv" in operationType.lower() else  MAC/frequency
    
    ComputeTime = MatrixComputeTime+VectorComputeTime
    
    band = numBytesInput*in_memory+numBytesOutput*out_memory+numBytesFilter*filter_memory
    
    inputMT = numBytesInput*in_memory
    filterMT = numBytesFilter*filter_memory
    outputMT = numBytesOutput*out_memory
    dataMT = band/bandwidth
    serialT = ComputeTime+dataMT
    parallelT = max(ComputeTime,dataMT)
    
    #################################################################################
    # Output Data generation.
    ################################################################################
    
    output = [
        operationType,N_f_m,
        in_memory,1,1,N_i,L_r,L_c,numBytesInput,
        out_memory,S_r,S_c,N_o,M_r,M_c,numBytesOutput,
        filter_memory,D_r,D_c,g,F_r,F_c,numBytesFilter,
        MatrixComputeTime,VectorComputeTime,ComputeTime,
        band,inputMT,outputMT,filterMT,dataMT,
        serialT,parallelT
    ]
    
    outputs.append(output)
    
    nets_inputDataMovementTime += inputMT
    nets_outputDataMovementTime += outputMT
    nets_filterDataMovementTime += filterMT
    nets_totalDataMovementTime += dataMT

    nets_MatrixComputeTime += MatrixComputeTime
    nets_VectorComputeTime += VectorComputeTime
    TotalComputeTime += ComputeTime
    nets_serialTime += serialT
    nets_parallelTime += parallelT

In [78]:
#################################################################################
# Output data append
################################################################################

output = [
        "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "","","",
        nets_MatrixComputeTime,
        nets_VectorComputeTime,
        TotalComputeTime,
        "",
        nets_inputDataMovementTime,
        nets_outputDataMovementTime,
        nets_filterDataMovementTime,
        nets_totalDataMovementTime,
        nets_serialTime,
        nets_parallelTime
    ]

outputs.append(output)

In [79]:
#################################################################################
# CSV DATA Write
################################################################################
with open("asj170430_output.csv", 'w', newline='') as file:
    fwriter = csv.writer(file, delimiter=',')
    fwriter.writerows(outputs)