In [1]:
# SETUP
import os
from ipywidgets import interact

import tvm
import nnvm
import nnvm.symbol as sb
import tvm.relay as relay

import numpy as np
import tensorflow as tf

# DEBUG
import messages
messages.DEBUG(False)
import pdb
os.environ['XDNN_VERBOSE'] = "1"

from xfdnn.tools.compile.bin.xfdnn_compiler_tvm import TVMCompiler

# DATA
# data_shape   = (1,3,224,224)

# TVM compiler

config = {
    'netcfg': "work/tvm_compiler.json",
    'weights': "work/weights_data.h5",
    'quantizecfg': "work/tvm_quantizer.json"
}

tvm_compiler = TVMCompiler(
    netcfg=config['netcfg'],
    weights=config['weights']
)

networkfile None
memory 9
dsp 96
generatefile work/tvm_compiler
fromtensorflow False
weights work/weights_data.h5
bytesperpixels 1
cpulayermustgo True
forceweightsfullyconnected True
verbose False
Namespace(anew=None, approximate=False, banditpre=None, barrier=False, bridges=None, bytesperpixels=1, concatstrategy=None, conv_1x1_s2=False, cpulayermustgo=True, customreplication=None, customtiling=None, cut=None, ddr=256, dedicateddsp=None, dsp=96, fancyreplications=False, fc=False, finalnode=None, forceweights=None, forceweightsfullyconnected=True, fromtensorflow=False, frontendonly=False, generatefile='work/tvm_compiler', initialnode=None, inputcut=None, laodschedule=None, lasttensorbyname=None, leavescalealone=False, loadpickle=None, manasadebugmode=None, manualbatch=False, manualdeconv=False, memory=9, mixmemorystrategy=False, networkfile='None', noconvexity=False, nodynamicscaling=False, noreplication=False, notcaffeanew=False, parallelism=False, parallelismgraphalgorithm='tfs', para

In [2]:
import models_util.model_util as model_util
models = model_util.get_models_dict()

@interact(MODEL=sorted(models.keys()))
def select_model(MODEL):
    global framework, model_name, model_path, opt_model_path, data_io,\
        data_shapes, add_output_layers, input_name, input_shape
    
    
    #model_name = MODEL #'TF-GoogLeNet_bvlc_without_lrn' # # #'TF-ResNet50' #
    
    print(models[MODEL])
    framework         = models[MODEL]['framework']
    model_name        = models[MODEL]['model']
    model_path        = models[MODEL]['model_path']
    opt_model_path    = models[MODEL]['weights_path']
    data_io           = models[MODEL]['io']
    add_output_layers = models[MODEL]['add_output_layers']
    
    data_inputs       = models[MODEL]['inputs']
    data_input_shapes = models[MODEL]['input_shapes']
    data_shapes = {}
    for inpt, shape in zip(data_inputs, data_input_shapes):
        data_shapes[inpt] = shape
        
    input_name = list(data_shapes.keys())[0]
    input_shape = data_shapes[input_name]



interactive(children=(Dropdown(description='MODEL', options=('Caffe-GoogLeNet_bvlc_without_lrn', 'Caffe2-GoogLâ€¦

In [3]:
print("Framework: {}".format(framework))
print("Model path: {}".format(model_path))
print("Optional model path: {}".format(opt_model_path))
print("Shapes: {}".format(data_shapes))

Framework: Tensorflow
Model path: /workspace/MLsuite/models_util/../models/external/TF_SLIM/nets/resnet_v1_50.pb
Optional model path: None
Shapes: {'Placeholder': [1, 224, 224, 3]}


In [4]:
from xfdnn.tools.io import load_model_from_file

frontend = 'NNVM'

if frontend == 'NNVM':
    compute_graph, params, data_layout = \
        load_model_from_file(frontend, framework)(model_path, 
                                                  data_shapes, 
                                                  opt_model_path)
    xfgraph = tvm_compiler.from_nnvm(compute_graph, params, shapes={}, 
                                     #output_op = "InceptionV1/Logits/AvgPool_0a_7x7/AvgPool",
                     data_layout=data_layout) #from_nnvm output_op
###elif frontend == 'Relay':
###    mod, params, data_layout = \
###        load_model_from_file(frontend, framework)(model_path, data_shapes, 
###                                                  opt_model_path)
###    xfgraph = tvm_compiler.from_relay(mod, params, 
###                                      data_layout=data_layout,
###                                      add_output_layers=add_output_layers)

{
  "nodes": [
    {
      "op": "null", 
      "name": "Placeholder", 
      "attrs": {"shape": "[1, 224, 224, 3]"}, 
      "inputs": []
    }, 
    {
      "op": "pad", 
      "name": "resnet_v1_50/Pad", 
      "attrs": {
        "pad_value": "0", 
        "pad_width": "((0, 0), (3, 3), (3, 3), (0, 0))"
      }, 
      "inputs": [[0, 0, 0]]
    }, 
    {
      "op": "null", 
      "name": "resnet_v1_50/conv1/weights", 
      "attrs": {"shape": "(7, 7, 3, 64)"}, 
      "inputs": []
    }, 
    {
      "op": "conv2d", 
      "name": "resnet_v1_50/conv1/Conv2D", 
      "attrs": {
        "channels": "64", 
        "dilation": "(1, 1)", 
        "kernel_layout": "HWIO", 
        "kernel_size": "(7, 7)", 
        "layout": "NHWC", 
        "padding": "[0, 0]", 
        "strides": "(2, 2)", 
        "use_bias": "False"
      }, 
      "inputs": [[1, 0, 0], [2, 0, 0]]
    }, 
    {
      "op": "null", 
      "name": "resnet_v1_50/conv1/BatchNorm/gamma", 
      "attrs": {"shape": "(64,)"}, 


Axis: {}
**************************************************
* XFDNN GRAPH OPTIMIZATIONS
**************************************************
PHASE 2 START Graph telescoping Merging bias into matmul
PHASE 2 END Graph telescoping Removed # nodes:  0
PHASE 3 START Graph telescoping Merging conv, batchnorm, scale and bias
--Merge conv and bias: resnet_v1_50/logits/Conv2D, broadcast_add16
--Merge conv and batchnorm: resnet_v1_50/conv1/Conv2D, resnet_v1_50/conv1/BatchNorm/FusedBatchNorm
--Merge conv and batchnorm: resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/FusedBatchNorm
--Merge conv and batchnorm: resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/BatchNorm/FusedBatchNorm
--Merge conv and batchnorm: resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/BatchNorm/FusedBatchNorm
--Merge conv and batchnorm: resnet_v1_50/bloc

--Merge conv and scale: resnet_v1_50/conv1/Conv2D, resnet_v1_50/conv1/BatchNorm/FusedBatchNorm_scaling
--Merge conv and scale: resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/FusedBatchNorm_scaling
--Merge conv and scale: resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/BatchNorm/FusedBatchNorm_scaling
--Merge conv and scale: resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/BatchNorm/FusedBatchNorm_scaling
--Merge conv and scale: resnet_v1_50/block1/unit_1/bottleneck_v1/conv3/Conv2D, resnet_v1_50/block1/unit_1/bottleneck_v1/conv3/BatchNorm/FusedBatchNorm_scaling
--Merge conv and scale: resnet_v1_50/block1/unit_2/bottleneck_v1/conv1/Conv2D, resnet_v1_50/block1/unit_2/bottleneck_v1/conv1/BatchNorm/FusedBatchNorm_scaling
--Merge conv and scale: resnet_v1_50/block1/unit_2/bottleneck_v1/conv2/Conv2D, resnet_v1_50/bloc

PHASE 3 LOOP 1 Removed # nodes:  0
PHASE 3 END Graph telescoping Done merging conv, batchnorm, scale and bias
PHASE 4 START Removing unnecessary graph nodes
PHASE 4 END Removed # nodes:  0
PHASE 5 Merging ReLU nonlinearity into preceding node 49
PHASE 6 Merging padding layer into succeeding conv or pool 54


In [7]:
xfgraph.visualize('tvm_graph.png')

Writing graph visualization to tvm_graph.png


In [5]:
# QUANTIZE
 
import xfdnn.tools.io as xfdnn_io
from xfdnn.tools.xfgraph.quantization import XfGraphDefaultQuantizer, XfGraphAddScalingQuantizer

calibration_directory = '/workspace/MLsuite/notebooks/calibration_directory'
img_io_func = xfdnn_io.load_imgs_from_file(data_io, input_shape[2:4], model_name)

quantizer = XfGraphDefaultQuantizer(
    xfgraph=xfgraph,
    quant_file=config["quantizecfg"], 
    data_layout='NCHW',
    data_loading_func=img_io_func,
    calibration_directory=calibration_directory,
    cal_size=15
)
quantizer.quantize()
 
xfgraph.save('xfgraph')

**************************************************
* BUILD CPU EXECUTION GRAPH
**************************************************
End building XfExecGraph
Layers:  81
['Placeholder', 'resnet_v1_50/conv1/Conv2D', 'resnet_v1_50/pool1/MaxPool', 'resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/Conv2D', 'resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Conv2D', 'resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Conv2D', 'resnet_v1_50/block1/unit_1/bottleneck_v1/conv3/Conv2D', 'broadcast_add0', 'resnet_v1_50/block1/unit_2/bottleneck_v1/conv1/Conv2D', 'resnet_v1_50/block1/unit_2/bottleneck_v1/conv2/Conv2D', 'resnet_v1_50/block1/unit_2/bottleneck_v1/conv3/Conv2D', 'broadcast_add1', 'resnet_v1_50/block1/unit_3/bottleneck_v1/shortcut/MaxPool', 'resnet_v1_50/block1/unit_3/bottleneck_v1/conv1/Conv2D', 'resnet_v1_50/block1/unit_3/bottleneck_v1/conv2/Conv2D', 'resnet_v1_50/block1/unit_3/bottleneck_v1/conv3/Conv2D', 'broadcast_add2', 'resnet_v1_50/block2/unit_1/bottleneck_v1/shortcut/Conv2D', 'resnet_v1

Kernel transposed shape: (1, 1, 64, 256)
Input shape transformed: (?, 56, 56, 64)
Padded input shape: (?, 56, 56, 64)
Res shape: (?, 256, 56, 56)
-----------------------
Op idx: 11, op_name: broadcast_add1
-----------------------
Op idx: 12, op_name: resnet_v1_50/block1/unit_3/bottleneck_v1/shortcut/MaxPool
Pooling layer: 0
x_h: -1, x_w: -1
Init pooling layer: Max
Input shape transformed: (?, 56, 56, 256)
Padded input shape: (?, 56, 56, 256)
Res shape: (?, 256, 28, 28)
-----------------------
Op idx: 13, op_name: resnet_v1_50/block1/unit_3/bottleneck_v1/conv1/Conv2D
['broadcast_add1']
Res shape: (64, 256, 1, 1)
Res shape: (64,)
Init convolution layer
[[None, 256, 56, 56], [64, 256, 1, 1], [64]]
Kernel transposed shape: (1, 1, 256, 64)
Input shape transformed: (?, 56, 56, 256)
Padded input shape: (?, 56, 56, 256)
Kernel transposed shape: (1, 1, 256, 64)
Input shape transformed: (?, 56, 56, 256)
Padded input shape: (?, 56, 56, 256)
Res shape: (?, 64, 56, 56)
-----------------------
Op id

Kernel transposed shape: (1, 1, 128, 512)
Input shape transformed: (?, 14, 14, 128)
Padded input shape: (?, 14, 14, 128)
Kernel transposed shape: (1, 1, 128, 512)
Input shape transformed: (?, 14, 14, 128)
Padded input shape: (?, 14, 14, 128)
Res shape: (?, 512, 14, 14)
-----------------------
Op idx: 34, op_name: broadcast_add6
-----------------------
Op idx: 35, op_name: resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D
['broadcast_add6']
Res shape: (1024, 512, 1, 1)
Res shape: (1024,)
Init convolution layer
[[None, 512, 14, 14], [1024, 512, 1, 1], [1024]]
Kernel transposed shape: (1, 1, 512, 1024)
Input shape transformed: (?, 14, 14, 512)
Padded input shape: (?, 14, 14, 512)
Kernel transposed shape: (1, 1, 512, 1024)
Input shape transformed: (?, 14, 14, 512)
Padded input shape: (?, 14, 14, 512)
Res shape: (?, 1024, 14, 14)
-----------------------
Op idx: 36, op_name: resnet_v1_50/block3/unit_1/bottleneck_v1/conv1/Conv2D
['broadcast_add6']
Res shape: (256, 512, 1, 1)
Res shape:

Res shape: (?, 512, 7, 7)
-----------------------
Op idx: 63, op_name: resnet_v1_50/block4/unit_1/bottleneck_v1/conv2/Conv2D
['resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D']
Res shape: (512, 512, 3, 3)
Res shape: (512,)
Init convolution layer
[[None, 512, 7, 7], [512, 512, 3, 3], [512]]
Kernel transposed shape: (3, 3, 512, 512)
Input shape transformed: (?, 7, 7, 512)
Padded input shape: (?, 9, 9, 512)
Kernel transposed shape: (3, 3, 512, 512)
Input shape transformed: (?, 7, 7, 512)
Padded input shape: (?, 9, 9, 512)
Res shape: (?, 512, 7, 7)
-----------------------
Op idx: 64, op_name: resnet_v1_50/block4/unit_1/bottleneck_v1/conv3/Conv2D
['resnet_v1_50/block4/unit_1/bottleneck_v1/conv2/Conv2D']
Res shape: (2048, 512, 1, 1)
Res shape: (2048,)
Init convolution layer
[[None, 512, 7, 7], [2048, 512, 1, 1], [2048]]
Kernel transposed shape: (1, 1, 512, 2048)
Input shape transformed: (?, 7, 7, 512)
Padded input shape: (?, 7, 7, 512)
Kernel transposed shape: (1, 1, 512, 2048)
Input s

-----------------------
Layer idx: 39, layer name: resnet_v1_50/block1/unit_3/bottleneck_v1/conv3/Conv2D
Layer inputs: ['resnet_v1_50/block1/unit_3/bottleneck_v1/conv2/Conv2D', 'resnet_v1_50/block1/unit_3/bottleneck_v1/conv3/Conv2D_kernel', 'resnet_v1_50/block1/unit_3/bottleneck_v1/conv3/Conv2D_biases']
Kernel transposed shape: (1, 1, 64, 256)
Input shape transformed: (?, 28, 28, 64)
Padded input shape: (?, 28, 28, 64)
-----------------------
Layer idx: 40, layer name: broadcast_add2
Layer inputs: ['resnet_v1_50/block1/unit_3/bottleneck_v1/shortcut/MaxPool', 'resnet_v1_50/block1/unit_3/bottleneck_v1/conv3/Conv2D']
-----------------------
Layer idx: 41, layer name: broadcast_add2
Layer inputs: ['broadcast_add2']
-----------------------
Layer idx: 42, layer name: resnet_v1_50/block2/unit_1/bottleneck_v1/shortcut/Conv2D_kernel
Layer inputs: ['resnet_v1_50/block2/unit_1/bottleneck_v1/shortcut/Conv2D_kernel']
-----------------------
Layer idx: 43, layer name: resnet_v1_50/block2/unit_1/bott

-----------------------
Layer idx: 88, layer name: broadcast_add6
Layer inputs: ['resnet_v1_50/block2/unit_4/bottleneck_v1/shortcut/MaxPool', 'resnet_v1_50/block2/unit_4/bottleneck_v1/conv3/Conv2D']
-----------------------
Layer idx: 89, layer name: broadcast_add6
Layer inputs: ['broadcast_add6']
-----------------------
Layer idx: 90, layer name: resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D_kernel
Layer inputs: ['resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D_kernel']
-----------------------
Layer idx: 91, layer name: resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D_biases
Layer inputs: ['resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D_biases']
-----------------------
Layer idx: 92, layer name: resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D
Layer inputs: ['broadcast_add6', 'resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D_kernel', 'resnet_v1_50/block3/unit_1/bottleneck_v1/shortcut/Conv2D_biases']
Kernel transposed shape: (1, 1, 5

-----------------------
Layer idx: 140, layer name: resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D_kernel
Layer inputs: ['resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D_kernel']
-----------------------
Layer idx: 141, layer name: resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D_biases
Layer inputs: ['resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D_biases']
-----------------------
Layer idx: 142, layer name: resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D
Layer inputs: ['resnet_v1_50/block3/unit_5/bottleneck_v1/conv1/Conv2D', 'resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D_kernel', 'resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D_biases']
Kernel transposed shape: (3, 3, 256, 256)
Input shape transformed: (?, 14, 14, 256)
Padded input shape: (?, 16, 16, 256)
-----------------------
Layer idx: 143, layer name: resnet_v1_50/block3/unit_5/bottleneck_v1/conv3/Conv2D_kernel
Layer inputs: ['resnet_v1_50/block3/unit_5/bottleneck_v1/conv3/Conv2D_kernel

Input shape transformed: (?, 7, 7, 512)
Padded input shape: (?, 9, 9, 512)
-----------------------
Layer idx: 191, layer name: resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D_kernel
Layer inputs: ['resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D_kernel']
-----------------------
Layer idx: 192, layer name: resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D_biases
Layer inputs: ['resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D_biases']
-----------------------
Layer idx: 193, layer name: resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D
Layer inputs: ['resnet_v1_50/block4/unit_3/bottleneck_v1/conv2/Conv2D', 'resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D_kernel', 'resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/Conv2D_biases']
Kernel transposed shape: (1, 1, 512, 2048)
Input shape transformed: (?, 7, 7, 512)
Padded input shape: (?, 7, 7, 512)
-----------------------
Layer idx: 194, layer name: broadcast_add15
Layer inputs: ['broadcast_add14', 'resnet_v1_50/b

-----------------------
Operation idx: 13, name: resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Conv2D, type: Convolution
Operation inputs shape: [(15, 64, 56, 56), (64, 64, 3, 3), (64,)], output shape: (15, 64, 56, 56)

Quantize convolution layer: resnet_v1_50/block1/unit_1/bottleneck_v1/conv2/Conv2D
Found input names: resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/Conv2D
Input (n,c,h,w) = (15, 64, 56, 56), Min: 0.0, Max: 12.39815902709961, Stdev: 0.9799368381500244
Weights (outchan,inchan,h,w) = (64, 64, 3, 3), Min: -0.2624363601207733, Max: 0.30505114793777466, Stdev: 0.02575431764125824
Min:  0 , Max:  12.170306
n:  128 , len(bin_edges):  1737
Zero in KL q
Mean : th_layer_out:  11.339556617396218 , sf_layer_out:  0.08928784738107258
11.339556617396218
Output (n,c,h,w) = (15, 64, 56, 56), Min: 0.0, Max: 12.170306205749512, Stdev: 1.2776541709899902
-----------------------
Operation idx: 14, name: resnet_v1_50/block1/unit_1/bottleneck_v1/conv3/Conv2D_kernel, type: Input
Operation inp

Threshold in: [12.298130889338672]
Input left shape: (15, 256, 56, 56), right shape:(15, 256, 56, 56),  Min: -14.857864379882812, Max: 16.171632766723633, Stdev: 1.092832088470459
Min:  0 , Max:  18.272707
n:  128 , len(bin_edges):  3472
Zero in KL q
Mean : th_layer_out:  8.03083103034861 , sf_layer_out:  0.06323489000274496
Threshold out: 8.03083103034861
Output (n,c,h,w) = (15, 256, 56, 56), Min: -14.857864379882812, Max: 18.272706985473633, Stdev: 1.5226346254348755
-----------------------
Operation idx: 29, name: broadcast_add1, type: Relu
Operation inputs shape: [(15, 256, 56, 56)], output shape: (15, 256, 56, 56)
-----------------------
Operation idx: 30, name: resnet_v1_50/block1/unit_3/bottleneck_v1/shortcut/MaxPool, type: Pooling
Operation inputs shape: [(15, 256, 56, 56)], output shape: (15, 256, 28, 28)
Quantize max pooling layer: resnet_v1_50/block1/unit_3/bottleneck_v1/shortcut/MaxPool
Input (n,c,h,w) = ((15, 256, 56, 56)), Min: 0.0, Max: 18.272706985473633, Stdev: 1.12063

Zero in KL q
Mean : th_layer_out:  8.388059601709914 , sf_layer_out:  0.06604771339929066
8.388059601709914
Output (n,c,h,w) = (15, 512, 28, 28), Min: -15.267704963684082, Max: 17.783411026000977, Stdev: 1.5429998636245728
-----------------------
Operation idx: 45, name: resnet_v1_50/block2/unit_1/bottleneck_v1/conv1/Conv2D_kernel, type: Input
Operation inputs shape: [(128, 256, 1, 1)], output shape: (128, 256, 1, 1)

Quantize input layer: resnet_v1_50/block2/unit_1/bottleneck_v1/conv1/Conv2D_kernel
Output (n,c,h,w) = (128, 256, 1, 1), Min: -0.4573066234588623, Max: 0.3087187111377716, Stdev: 0.036902740597724915
-----------------------
Operation idx: 46, name: resnet_v1_50/block2/unit_1/bottleneck_v1/conv1/Conv2D_biases, type: Input
Operation inputs shape: [(128,)], output shape: (128,)

Quantize input layer: resnet_v1_50/block2/unit_1/bottleneck_v1/conv1/Conv2D_biases
Output (n,c,h,w) = (128,), Min: -3.2372756004333496, Max: 3.2022011280059814, Stdev: 1.0157651901245117
-------------

-----------------------
Operation idx: 61, name: resnet_v1_50/block2/unit_2/bottleneck_v1/conv2/Conv2D, type: Convolution
Operation inputs shape: [(15, 128, 28, 28), (128, 128, 3, 3), (128,)], output shape: (15, 128, 28, 28)

Quantize convolution layer: resnet_v1_50/block2/unit_2/bottleneck_v1/conv2/Conv2D
Found input names: resnet_v1_50/block2/unit_2/bottleneck_v1/conv1/Conv2D
Input (n,c,h,w) = (15, 128, 28, 28), Min: 0.0, Max: 9.185839653015137, Stdev: 0.5311100482940674
Weights (outchan,inchan,h,w) = (128, 128, 3, 3), Min: -0.3206142485141754, Max: 0.41105714440345764, Stdev: 0.03912122920155525
Min:  0 , Max:  11.243227
n:  128 , len(bin_edges):  1228
Zero in KL q
Mean : th_layer_out:  6.693706053101929 , sf_layer_out:  0.05270634687481834
6.693706053101929
Output (n,c,h,w) = (15, 128, 28, 28), Min: 0.0, Max: 11.243227005004883, Stdev: 0.5783504843711853
-----------------------
Operation idx: 62, name: resnet_v1_50/block2/unit_2/bottleneck_v1/conv3/Conv2D_kernel, type: Input
Operat

Min:  0 , Max:  27.291246
n:  128 , len(bin_edges):  2455
Zero in KL q
Mean : th_layer_out:  10.370451214843975 , sf_layer_out:  0.08165709617987382
Threshold out: 10.370451214843975
Output (n,c,h,w) = (15, 512, 28, 28), Min: -27.29124641418457, Max: 20.605316162109375, Stdev: 1.4843072891235352
-----------------------
Operation idx: 77, name: broadcast_add5, type: Relu
Operation inputs shape: [(15, 512, 28, 28)], output shape: (15, 512, 28, 28)
-----------------------
Operation idx: 78, name: resnet_v1_50/block2/unit_4/bottleneck_v1/shortcut/MaxPool, type: Pooling
Operation inputs shape: [(15, 512, 28, 28)], output shape: (15, 512, 14, 14)
Quantize max pooling layer: resnet_v1_50/block2/unit_4/bottleneck_v1/shortcut/MaxPool
Input (n,c,h,w) = ((15, 512, 28, 28)), Min: 0.0, Max: 20.605316162109375, Stdev: 1.0865304470062256
Output (n,c,h,w) = ((15, 512, 14, 14)), Min: 0.0, Max: 20.605316162109375, Stdev: 1.096451759338379
-----------------------
Operation idx: 79, name: resnet_v1_50/blo

Input (n,c,h,w) = (15, 512, 14, 14), Min: 0.0, Max: 20.87090492248535, Stdev: 1.2279324531555176
Weights (outchan,inchan,h,w) = (256, 512, 1, 1), Min: -0.2518995404243469, Max: 0.49932432174682617, Stdev: 0.023159272968769073
Min:  0 , Max:  18.438625
n:  128 , len(bin_edges):  869
Zero in KL q
Mean : th_layer_out:  6.978212468635102 , sf_layer_out:  0.05494655487114254
6.978212468635102
Output (n,c,h,w) = (15, 256, 14, 14), Min: 0.0, Max: 18.43862533569336, Stdev: 0.6676626801490784
-----------------------
Operation idx: 96, name: resnet_v1_50/block3/unit_1/bottleneck_v1/conv2/Conv2D_kernel, type: Input
Operation inputs shape: [(256, 256, 3, 3)], output shape: (256, 256, 3, 3)

Quantize input layer: resnet_v1_50/block3/unit_1/bottleneck_v1/conv2/Conv2D_kernel
Output (n,c,h,w) = (256, 256, 3, 3), Min: -0.21205735206604004, Max: 0.26809847354888916, Stdev: 0.01665567234158516
-----------------------
Operation idx: 97, name: resnet_v1_50/block3/unit_1/bottleneck_v1/conv2/Conv2D_biases, t

-----------------------
Operation idx: 112, name: resnet_v1_50/block3/unit_2/bottleneck_v1/conv3/Conv2D, type: Convolution
Operation inputs shape: [(15, 256, 14, 14), (1024, 256, 1, 1), (1024,)], output shape: (15, 1024, 14, 14)

Quantize convolution layer: resnet_v1_50/block3/unit_2/bottleneck_v1/conv3/Conv2D
Found input names: resnet_v1_50/block3/unit_2/bottleneck_v1/conv2/Conv2D
Input (n,c,h,w) = (15, 256, 14, 14), Min: 0.0, Max: 36.604740142822266, Stdev: 0.4266788959503174
Weights (outchan,inchan,h,w) = (1024, 256, 1, 1), Min: -1.8831462860107422, Max: 1.5195558071136475, Stdev: 0.09225223958492279
Min:  0 , Max:  22.145569
n:  128 , len(bin_edges):  1737
Zero in KL q
Mean : th_layer_out:  6.499520350161785 , sf_layer_out:  0.05117732559182508
6.499520350161785
Output (n,c,h,w) = (15, 1024, 14, 14), Min: -22.14556884765625, Max: 15.3539457321167, Stdev: 0.7783132791519165
-----------------------
Operation idx: 113, name: broadcast_add8, type: Eltwise
Operation inputs shape: [(15, 

-----------------------
Operation idx: 128, name: resnet_v1_50/block3/unit_4/bottleneck_v1/conv1/Conv2D, type: Convolution
Operation inputs shape: [(15, 1024, 14, 14), (256, 1024, 1, 1), (256,)], output shape: (15, 256, 14, 14)

Quantize convolution layer: resnet_v1_50/block3/unit_4/bottleneck_v1/conv1/Conv2D
Found input names: broadcast_add9
Input (n,c,h,w) = (15, 1024, 14, 14), Min: 0.0, Max: 20.268369674682617, Stdev: 0.9803948402404785
Weights (outchan,inchan,h,w) = (256, 1024, 1, 1), Min: -0.1613612025976181, Max: 0.40676558017730713, Stdev: 0.01820891536772251
Min:  0 , Max:  23.198912
n:  128 , len(bin_edges):  869
Zero in KL q
Mean : th_layer_out:  6.828711901941607 , sf_layer_out:  0.05376938505465832
6.828711901941607
Output (n,c,h,w) = (15, 256, 14, 14), Min: 0.0, Max: 23.198911666870117, Stdev: 0.3699401617050171
-----------------------
Operation idx: 129, name: resnet_v1_50/block3/unit_4/bottleneck_v1/conv2/Conv2D_kernel, type: Input
Operation inputs shape: [(256, 256, 3, 

-----------------------
Operation idx: 145, name: resnet_v1_50/block3/unit_5/bottleneck_v1/conv3/Conv2D, type: Convolution
Operation inputs shape: [(15, 256, 14, 14), (1024, 256, 1, 1), (1024,)], output shape: (15, 1024, 14, 14)

Quantize convolution layer: resnet_v1_50/block3/unit_5/bottleneck_v1/conv3/Conv2D
Found input names: resnet_v1_50/block3/unit_5/bottleneck_v1/conv2/Conv2D
Input (n,c,h,w) = (15, 256, 14, 14), Min: 0.0, Max: 36.11723709106445, Stdev: 0.5825785398483276
Weights (outchan,inchan,h,w) = (1024, 256, 1, 1), Min: -0.8410162329673767, Max: 1.285105586051941, Stdev: 0.07283701002597809
Min:  0 , Max:  27.228315
n:  128 , len(bin_edges):  1737
Zero in KL q
Mean : th_layer_out:  7.48935517352847 , sf_layer_out:  0.058971300578964325
7.48935517352847
Output (n,c,h,w) = (15, 1024, 14, 14), Min: -27.228315353393555, Max: 15.625455856323242, Stdev: 0.9183263778686523
-----------------------
Operation idx: 146, name: broadcast_add11, type: Eltwise
Operation inputs shape: [(15,

-----------------------
Operation idx: 162, name: resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D, type: Convolution
Operation inputs shape: [(15, 1024, 7, 7), (2048, 1024, 1, 1), (2048,)], output shape: (15, 2048, 7, 7)

Quantize convolution layer: resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D
Found input names: broadcast_add12
Input (n,c,h,w) = (15, 1024, 7, 7), Min: 0.0, Max: 20.344371795654297, Stdev: 1.0052059888839722
Weights (outchan,inchan,h,w) = (2048, 1024, 1, 1), Min: -2.8222451210021973, Max: 2.668454170227051, Stdev: 0.06410454958677292
Min:  0 , Max:  45.082287
n:  128 , len(bin_edges):  1228
Zero in KL q
Mean : th_layer_out:  18.719987891025433 , sf_layer_out:  0.14740147945689316
18.719987891025433
Output (n,c,h,w) = (15, 2048, 7, 7), Min: -18.70884895324707, Max: 45.0822868347168, Stdev: 2.606299877166748
-----------------------
Operation idx: 163, name: resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D_kernel, type: Input
Operation inputs shape

-----------------------
Operation idx: 179, name: resnet_v1_50/block4/unit_2/bottleneck_v1/conv2/Conv2D, type: Convolution
Operation inputs shape: [(15, 512, 7, 7), (512, 512, 3, 3), (512,)], output shape: (15, 512, 7, 7)

Quantize convolution layer: resnet_v1_50/block4/unit_2/bottleneck_v1/conv2/Conv2D
Found input names: resnet_v1_50/block4/unit_2/bottleneck_v1/conv1/Conv2D
Input (n,c,h,w) = (15, 512, 7, 7), Min: 0.0, Max: 7.386351108551025, Stdev: 0.3258780539035797
Weights (outchan,inchan,h,w) = (512, 512, 3, 3), Min: -0.4828679859638214, Max: 0.8266786336898804, Stdev: 0.027438243851065636
Min:  0 , Max:  5.6512294
n:  128 , len(bin_edges):  615
Zero in KL q
Mean : th_layer_out:  4.717027781840644 , sf_layer_out:  0.0371419510381153
4.717027781840644
Output (n,c,h,w) = (15, 512, 7, 7), Min: 0.0, Max: 5.651229381561279, Stdev: 0.29222020506858826
-----------------------
Operation idx: 180, name: resnet_v1_50/block4/unit_2/bottleneck_v1/conv3/Conv2D_kernel, type: Input
Operation inpu

Zero in KL q
Mean : th_layer_out:  23.76640323244078 , sf_layer_out:  0.18713703332630535
Threshold out: 23.76640323244078
Output (n,c,h,w) = (15, 2048, 7, 7), Min: -19.42112159729004, Max: 62.78014373779297, Stdev: 2.7255702018737793
-----------------------
Operation idx: 195, name: broadcast_add15, type: Relu
Operation inputs shape: [(15, 2048, 7, 7)], output shape: (15, 2048, 7, 7)
-----------------------
Operation idx: 196, name: resnet_v1_50/pool5, type: Mean
Operation inputs shape: [(15, 2048, 7, 7)], output shape: (15, 2048, 1, 1)
-----------------------
Operation idx: 197, name: resnet_v1_50/logits/Conv2D_kernel, type: Input
Operation inputs shape: [(1000, 2048, 1, 1)], output shape: (1000, 2048, 1, 1)

Quantize input layer: resnet_v1_50/logits/Conv2D_kernel
Output (n,c,h,w) = (1000, 2048, 1, 1), Min: -0.2103586047887802, Max: 0.7361589670181274, Stdev: 0.033539678901433945
-----------------------
Operation idx: 198, name: resnet_v1_50/logits/Conv2D_biases, type: Input
Operatio

In [6]:
# COMPILE
tvm_compiler.compile(xfgraph)

DSP: 96
DSP V3
Memory: 9
DDR: 256
**************************************************
* HARDWARE 
**************************************************

##########
The brains presents: 
DDR The_Master_of_the_brains
	 self.alignment         64
	 self.FREE              {(0, 268435456): MemoryAllocation(start=0, end=268435456, size=268435456, extra=[], strategy=[], layout=-1, timestamp=-1, slice=-1, shapes=None, replication=Replication(full_sect_num=0, repl_sect_num=0, repl_unit_num=0, repl_unit_width=0, channels_division=0), written=False, specifier='', IO=False)}
	 self.size              268435456
	 self.timestamp         0
	 self.slice             -1
	 self.bytesperpixels    1
	 self.strategies_       ['bysize', 'bottom', 'top', 'tops', 'bottle', 'bottles', 'xXx', 'shuffle', 'flip']
	 self.rules              rule3
	 self.frequency           300000000.0
	 self.efficiency          0.9
	 self.batches             4
	 self.dsp 96
DDR INPUT  NONE
DDR OUTPUT NONE
DSP small pinky
	 self.slice     

**************************************************
* Graph weight manipulation
**************************************************

**************************************************
* Concat Alignment verification to mod 8             
**************************************************
**************************************************
* CPU Layer will be REMOVED
**************************************************
**************************************************
* WRITING WEIGHTS FC
**************************************************
Weight HDF5: work/weights_data.h5
Weight Directory: work/weights_data.h5

Done writing FC weights.
* CPU Layer schedule

{1} -|-0 name Placeholder type Input fpga True bottoms [] [Extras ['resnet_v1_50/Pad']]-  Past [] -> Future []
{1} -|-1 name resnet_v1_50/conv1/Conv2D type Convolution fpga True bottoms ['Placeholder'] [Extras ['resnet_v1_50/conv1/BatchNorm/FusedBatchNorm', 'resnet_v1_50/conv1/BatchNorm/FusedBatchNorm_scaling', 'resnet_v1_50/conv1/Relu']

NO FPGA resnet_v1_50/pool5 Layers 82 Time 82
NO FPGA resnet_v1_50/logits/Conv2D Layers 81 Time 82
NO FPGA resnet_v1_50/SpatialSqueeze Layers 80 Time 81
NO FPGA resnet_v1_50/predictions/Reshape Layers 79 Time 80
NO FPGA flatten0 Layers 78 Time 79
NO FPGA resnet_v1_50/predictions/Softmax Layers 77 Time 78
NO FPGA resnet_v1_50/predictions/Reshape_1 Layers 76 Time 77

Every time we say goodbye to CPU Layer

* resnet_v1_50/pool5 
* resnet_v1_50/logits/Conv2D 
* resnet_v1_50/SpatialSqueeze 
* resnet_v1_50/predictions/Reshape 
* flatten0 
* resnet_v1_50/predictions/Softmax 
* resnet_v1_50/predictions/Reshape_1 
* Without CPU Layer schedule

{1} -|-0 name Placeholder type Input fpga True bottoms [] [Extras ['resnet_v1_50/Pad']]-  Past [] -> Future []
{1} -|-1 name resnet_v1_50/conv1/Conv2D type Convolution fpga True bottoms ['Placeholder'] [Extras ['resnet_v1_50/conv1/BatchNorm/FusedBatchNorm', 'resnet_v1_50/conv1/BatchNorm/FusedBatchNorm_scaling', 'resnet_v1_50/conv1/Relu']]-  Past ['resnet_v

Minimum Memory __________
1 ['resnet_v1_50/conv1/Conv2D'] size:6021120 remap:[] data movement:[]
1	Placeholder_blob M[0,4816896] Z=4816896 F=[1] B=[0] E=[] S=['layer'] [] L=-1 T=SizeType(batches=-1, channels=3, height=224, width=224)
1	resnet_v1_50/conv1/Conv2D_blob M[0,1204224] Z=1204224 F=[2] B=[1] E=[] S=['layer'] [] L=-1 T=SizeType(batches=-1, channels=64, height=112, width=112)
MAX  1
TOP 5
__________
1 ['resnet_v1_50/conv1/Conv2D'] size:6021120 remap:[] data movement:[]
1	Placeholder_blob M[0,4816896] Z=4816896 F=[1] B=[0] E=[] S=['layer'] [] L=-1 T=SizeType(batches=-1, channels=3, height=224, width=224)
1	resnet_v1_50/conv1/Conv2D_blob M[0,1204224] Z=1204224 F=[2] B=[1] E=[] S=['layer'] [] L=-1 T=SizeType(batches=-1, channels=64, height=112, width=112)
__________
0 ['Placeholder'] size:4816896 remap:[] data movement:[]
0	Placeholder_blob M[0,4816896] Z=4816896 F=[1] B=[0] E=[] S=['layer'] [] L=-1 T=SizeType(batches=-1, channels=3, height=224, width=224)
__________
6 ['resnet_v1_

 broadcast_add8 Wait(Wait_Download=1, Wait_Upload=1, Wait_Conv=1, Wait_Pool=1, Wait_EW=1, Wait_Upsmpl=1, ParalleRead=0) AM Partition [0 3145728 6291456 9437184]  
  IN 9117312-9324288 
  IN 8853888-9060864
NOT PARALLEL ELTREAD >
 broadcast_add9 Wait(Wait_Download=1, Wait_Upload=1, Wait_Conv=1, Wait_Pool=1, Wait_EW=1, Wait_Upsmpl=1, ParalleRead=0) AM Partition [0 3145728 6291456 9437184]  
  IN 9117312-9324288 
  IN 8853888-9060864
NOT PARALLEL ELTREAD >
 broadcast_add10 Wait(Wait_Download=1, Wait_Upload=1, Wait_Conv=1, Wait_Pool=1, Wait_EW=1, Wait_Upsmpl=1, ParalleRead=0) AM Partition [0 3145728 6291456 9437184]  
  IN 9117312-9324288 
  IN 8853888-9060864
NOT PARALLEL ELTREAD >
 broadcast_add11 Wait(Wait_Download=1, Wait_Upload=1, Wait_Conv=1, Wait_Pool=1, Wait_EW=1, Wait_Upsmpl=1, ParalleRead=0) AM Partition [0 3145728 6291456 9437184]  
  IN 9117312-9324288 
  IN 8853888-9060864
NOT PARALLEL ELTREAD >
 broadcast_add12 Wait(Wait_Download=1, Wait_Upload=1, Wait_Conv=1, Wait_Pool=1, Wa

Processing weights for 75 schedule steps: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
Done writing weights.
SUCCESS True


In [None]:
# (TESTING) BUILD FOR INTERNAL XDNN EXECUTION
# from xfdnn.tools.xfgraph.xfgraph import XfGraph

##xfgraph = XfGraph()
##xfgraph.load('xfgraph.json', 'xfgraph.h5')
## 
#xfgraph.build(device='sim', quantcfg=config["quantizecfg"])

# xfgraph.build(device='cpu')

In [5]:
# PREPARING THE INPUT
# CHOSE AN IMAGE TO RUN, DISPLAY IT FOR REFERENCE
import xfdnn.tools.io as xfdnn_io
import numpy as np
import cv2

from matplotlib import pyplot as plt
#%matplotlib inline

imagenet_val_set = None
with open('/workspace/MLsuite/notebooks/imagenet-val/val_map.txt') as f:
    imagenet_val_set = [line.strip('\n').split(' ') for line in f.readlines()]

# NEXT TWO VARIABLES NEED TO BE ADJUSTED TO TRY OUT OTHER INPUTS
val_images = ["/workspace/MLsuite/examples/image_classify/sample_images/dog.jpg"]
input_shape = (1,3,224,224) 

img = cv2.imread(val_images[0])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.title(val_images[0])
plt.show()

batch_array = np.empty(input_shape, dtype=np.float32, order='C')
img_paths = val_images

img_io_func = xfdnn_io.load_imgs_from_file(data_io, input_shape[2:4], model_name)

data = img_io_func(img_paths)
batch_array[:] = data
print(batch_array.shape)
#print(batch_array[0])

# numpy print option
np.set_printoptions(precision=4, suppress=True)

# SIM
inputs = {}
# TODO only one input so this is working
inputs[input_name] = batch_array # Placeholder / data / 0

<Figure size 640x480 with 1 Axes>

(1, 3, 224, 224)


In [None]:
# (TESTING) RUN ON CPU FOR TESTING PURPOSES
#res = xfgraph.run(inputs, #['InceptionV1/Logits/SpatialSqueeze'],
#                  batch_size=1)

#print(res[0].shape)
#print(repr(res[0]))
#print(np.max(res[0]))

In [6]:
# RECONSTRUCT AND FUSE THE GRAPH FOR XDNN
import contrib_xdnn
from graph import graph_reconst
gidx = compute_graph.index

print("--debug: start reconstructing the graph")
graph = graph_reconst(config["netcfg"],gidx.nodes, add_output_layers)

print("--debug: finished reconstructing the graph")


#shapes = nnvm.compiler.graph_util.infer_shape(compute_graph)

# SETUP AND COMPILE THE RECONSTRUCTED NNVM GRAPH

target, target_host = 'llvm', 'llvm'
params_shapes = dict((k, params[k].shape) for k in params)
params_dtypes  = dict((k, params[k].dtype) for k in params)
input_type = 'float32'
#shape_dict = {'Placeholder': res[0].shape}
#shape_dict = {'Placeholder': (1,224,224,3)}
shape_dict = {}
shape_dict[input_name] = input_shape
dtype_dict = {}
dtype_dict[input_name] = input_type
shape_dict.update(params_shapes)
dtype_dict.update(params_dtypes)

print(shape_dict)
graph, lib, params = nnvm.compiler.build(
    graph, target, shape_dict, dtype_dict,
    params=params, target_host=target_host)


print("--debug: finished recompiling NNVM graph")



--debug: start reconstructing the graph
--debug: finished reconstructing the graph
{'Placeholder': (1, 3, 224, 224), 'resnet_v1_50/conv1/weights': (7, 7, 3, 64), 'resnet_v1_50/conv1/BatchNorm/gamma': (64,), 'resnet_v1_50/conv1/BatchNorm/beta': (64,), 'resnet_v1_50/conv1/BatchNorm/moving_mean': (64,), 'resnet_v1_50/conv1/BatchNorm/moving_variance': (64,), 'resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/weights': (1, 1, 64, 256), 'resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/gamma': (256,), 'resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/beta': (256,), 'resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/moving_mean': (256,), 'resnet_v1_50/block1/unit_1/bottleneck_v1/shortcut/BatchNorm/moving_variance': (256,), 'resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/weights': (1, 1, 64, 64), 'resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/BatchNorm/gamma': (64,), 'resnet_v1_50/block1/unit_1/bottleneck_v1/conv1/BatchNorm/beta': (64,), 'resnet_v1_50/block1/un

--debug: finished recompiling NNVM graph


In [1]:
# RUN THE GRAPH
from tvm.contrib import graph_runtime
ctx = tvm.cpu(0)
m = graph_runtime.create(graph, lib, ctx)
#m.set_input(Placeholder=np.array(res[0]))
#m.set_input(Placeholder=(np.transpose(batch_array,(0,2,3,1))))
#inpts = {}
#inpts[input_name] = np.array(batch_array)
m.set_input(**inputs)
m.set_input(**params)
# RUN
m.run()

tvm_output = m.get_output(0)

NameError: name 'tvm' is not defined

In [None]:
# PERFORM PREDICTION
import xfdnn.tools.xfgraph.classification as xfdnn_classification

# TODO: Make this more automatic: 1000 <-> 1001
def predict(tensor):
    raw_predictions = tensor
    if raw_predictions.shape[1] == 1000:
        label_lst = [elem[1] for elem in imagenet_val_set[:raw_predictions.shape[0]]]
        synset_words = 'synset_words.txt'
    elif raw_predictions.shape[1] == 1001:
        # for inception, ...
        label_lst = [int(elem[1]) + 1 for elem in imagenet_val_set[:raw_predictions.shape[0]]]
        synset_words = 'synset_words_1001.txt'
    else:
        raise ValueError("Unknown number of predicted categories: {}".format(raw_predictions.shape[1]))
    
    top_1 = xfdnn_classification.get_top_k_accuracy(raw_predictions, synset_words, 1, label_lst)
    top_5 = xfdnn_classification.get_top_k_accuracy(raw_predictions, synset_words, 5, label_lst)   
    print("Top 1: {}".format(top_1))
    print("Top 5: {}".format(top_5))


predict(tvm_output.asnumpy())