# AWS Inferentia inference on Amazon EC2 Inf1 instance
This example demonstrates AWS Inferentia inference with TensorFlow and AWS Neuron SDK compiler and runtime

This example was tested on Amazon EC2 `inf1.xlarge` the following AWS Deep Learning AMI: 
`Deep Learning AMI (Ubuntu 18.04) Version 35.0`

Run this notebook using the following conda environment:
`aws_neuron_tensorflow_p36`

Prepare your imagenet validation TFRecord files using the following helper script:
https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh

Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function

In [1]:
# !pip install matplotlib pandas

In [1]:
!/opt/aws/neuron/bin/neuron-cli reset
import os
import time
import shutil
import json
import requests
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.neuron as tfn
import tensorflow.compat.v1.keras as keras
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing import image
from concurrent import futures
from itertools import compress

print('test')

test


In [2]:
# https://github.com/tensorflow/tensorflow/issues/29931
temp = tf.zeros([8, 224, 224, 3])
_ = tf.keras.applications.vgg16.preprocess_input(temp)

### Resnet50 FP32 saved model

In [3]:
# Export SavedModel
model_type = 'mobilenet'

saved_model_dir = f'{model_type}_saved_model'
shutil.rmtree(saved_model_dir, ignore_errors=True)

keras.backend.set_learning_phase(0)
model = VGG16(weights='imagenet')
tf.saved_model.simple_save(session = keras.backend.get_session(),
                           export_dir = saved_model_dir,
                           inputs = {'input_1:0': model.inputs[0]},
                           outputs = {'probs/Softmax:0': model.outputs[0]})

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.simple_save.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: mobilenet_saved_model/saved_model.pb


### Compile models with different batch sizes and cores

In [5]:
def compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1, use_static_weights=False):
    print(f'-----------batch size: {batch_size}, num cores: {num_cores}----------')
    print('Compiling...')
    
    compiled_model_dir = f'{model_type}_batch_{batch_size}_inf1_cores_{num_cores}'
    inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)
    shutil.rmtree(inf1_compiled_model_dir, ignore_errors=True)

    example_input = np.zeros([batch_size,224,224,3], dtype='float32')

    compiler_args = ['--verbose','1', '--num-neuroncores', str(num_cores)]
    if use_static_weights:
        compiler_args.append('--static-weights')
    
    start_time = time.time()
    compiled_res = tfn.saved_model.compile(model_dir = saved_model_dir,
                            model_feed_dict={'input_1:0': example_input},
                            new_model_dir = inf1_compiled_model_dir,
                            dynamic_batch_size=True,
                            compiler_args = compiler_args)
    print(f'Compile time: {time.time() - start_time}')
    
    compile_success = False
    perc_on_inf = compiled_res['OnNeuronRatio'] * 100
    if perc_on_inf > 50:
        compile_success = True
            
    print(inf1_compiled_model_dir)
    print(compiled_res)
    print('----------- Done! ----------- \n')
    
    return compile_success

### Use `tf.data` to read ImageNet validation dataset

In [6]:
def deserialize_image_record(record):
    feature_map = {'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),
                  'image/class/label': tf.io.FixedLenFeature([1], tf.int64, -1),
                  'image/class/text': tf.io.FixedLenFeature([], tf.string, '')}
    obj = tf.io.parse_single_example(serialized=record, features=feature_map)
    imgdata = obj['image/encoded']
    label = tf.cast(obj['image/class/label'], tf.int32)   
    label_text = tf.cast(obj['image/class/text'], tf.string)   
    return imgdata, label, label_text

def val_preprocessing(record):
    imgdata, label, label_text = deserialize_image_record(record)
    label -= 1
    image = tf.io.decode_jpeg(imgdata, channels=3, 
                              fancy_upscaling=False, 
                              dct_method='INTEGER_FAST')

    shape = tf.shape(image)
    height = tf.cast(shape[0], tf.float32)
    width = tf.cast(shape[1], tf.float32)
    side = tf.cast(tf.convert_to_tensor(256, dtype=tf.int32), tf.float32)

    scale = tf.cond(tf.greater(height, width),
                  lambda: side / width,
                  lambda: side / height)
    
    new_height = tf.cast(tf.math.rint(height * scale), tf.int32)
    new_width = tf.cast(tf.math.rint(width * scale), tf.int32)
    
    image = tf.image.resize(image, [new_height, new_width], method='bicubic')
    image = tf.image.resize_with_crop_or_pad(image, 224, 224)
    
    image = tf.keras.applications.vgg16.preprocess_input(image)
    
    return image, label, label_text

def get_dataset(batch_size, use_cache=False):
    data_dir = '/home/ubuntu/datasets/*'
    files = tf.io.gfile.glob(os.path.join(data_dir))
    dataset = tf.data.TFRecordDataset(files)
    
    dataset = dataset.map(map_func=val_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    dataset = dataset.repeat(count=1)
    
    if use_cache:
        shutil.rmtree('tfdatacache', ignore_errors=True)
        os.mkdir('tfdatacache')
        dataset = dataset.cache(f'./tfdatacache/imagenet_val')
    
    return dataset

## Single AWS Inferentia chip execution
* Single core compiled models with automatic data parallel model upto 4 cores
* Multi-core compiled models for pipeline execution

In [15]:
def inf1_predict_benchmark_single_threaded(neuron_saved_model_name, batch_size, user_batch_size, num_cores, use_cache=False, warm_up=10):
    print(f'Running model {neuron_saved_model_name}, user_batch_size: {user_batch_size}\n')

    model_inf1 = tf.contrib.predictor.from_saved_model(neuron_saved_model_name)

    iter_times = []
    pred_labels = []
    actual_labels = []
    display_threshold = 0
    warm_up = 10

    ds = get_dataset(user_batch_size, use_cache)

    ds_iter = ds.make_initializable_iterator()
    ds_next = ds_iter.get_next()
    ds_init_op = ds_iter.initializer

    with tf.Session() as sess:
        if use_cache:
            sess.run(ds_init_op)
            print('\nCaching dataset ...')
            start_time = time.time()
            try:
                while True:
                    (validation_ds,label,_) = sess.run(ds_next)
            except tf.errors.OutOfRangeError:
                pass
            print(f'Caching finished: {time.time()-start_time} sec')  

        try:
            sess.run(ds_init_op)
            counter = 0
            
            total_datas = 1000
            display_every = 100
            display_threshold = display_every
            
            ipname = list(model_inf1.feed_tensors.keys())[0]
            resname = list(model_inf1.fetch_tensors.keys())[0]
            
            walltime_start = time.time()
            sess_time = []
            extend_time = []
            while True:
                sess_start = time.time()
                (validation_ds,batch_labels,_) = sess.run(ds_next)
                sess_time.append(time.time() - sess_start)
                
                model_feed_dict={ipname: validation_ds}

                if counter == 0:
                    for i in range(warm_up):
                        _ = model_inf1(model_feed_dict);                    

                start_time = time.time()
                inf1_results = model_inf1(model_feed_dict);
                iter_times.append(time.time() - start_time)
                
                extend_start = time.time()
                actual_labels.extend(label for label_list in batch_labels for label in label_list)
                pred_labels.extend(list(np.argmax(inf1_results[resname], axis=1)))
                extend_time.append(time.time() - extend_start)
                
                if counter*user_batch_size >= display_threshold:
                    print(f'Images {counter*user_batch_size}/{total_datas}. Average i/s {np.mean(user_batch_size/np.array(iter_times[-display_every:]))}')
                    display_threshold+=display_every

                counter+=1

        except tf.errors.OutOfRangeError:
            pass
    
    labeling_start = time.time()
    
    acc_inf1 = np.sum(np.array(actual_labels) == np.array(pred_labels))/len(actual_labels)
    iter_times = np.array(iter_times)
    labeling_time = time.time() - labeling_start
    
    results = pd.DataFrame(columns = [f'inf1_compiled_batch_size_{batch_size}_compiled_cores_{num_cores}'])
    results.loc['compiled_batch_size']     = [batch_size]
    results.loc['user_batch_size']         = [user_batch_size]
    results.loc['accuracy']                = [acc_inf1]
    results.loc['prediction_time']         = [np.sum(iter_times)]
    results.loc['sess_time']               = [np.sum(np.array(sess_time))]
    results.loc['extend_time']             = [np.sum(np.array(extend_time))]
    results.loc['labeling_time']           = [np.sum(np.array(labeling_time))]
    results.loc['wall_time']               = [time.time() - walltime_start]
    results.loc['images_per_sec_mean']     = [np.mean(user_batch_size / iter_times)]
    results.loc['images_per_sec_std']      = [np.std(user_batch_size / iter_times, ddof=1)]
    results.loc['latency_mean']            = [np.mean(iter_times) * 1000]
    results.loc['latency_99th_percentile'] = [np.percentile(iter_times, q=99, interpolation="lower") * 1000]
    results.loc['latency_median']          = [np.median(iter_times) * 1000]
    results.loc['latency_min']             = [np.min(iter_times) * 1000]
    display(results.T)
    shutil.rmtree(neuron_saved_model_name, ignore_errors=True)

    return results, iter_times

In [16]:
inf1_model_dir = f'{model_type}_inf1_saved_models'
saved_model_dir = f'{model_type}_saved_model'


# testing batch size
batch_list = [1]
num_of_cores = [1]
for batch in batch_list:
    for core in num_of_cores:
        print('batch size:', batch,'core nums', core,'compile start')
        compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=batch, num_cores=core)

inf1_model_dir = f'{model_type}_inf1_saved_models'

for batch_size in batch_list:
    iter_ds = pd.DataFrame()
    results = pd.DataFrame()
    for num_cores in num_of_cores:
        opt ={'batch_size': batch_size, 'num_cores': num_of_cores}
        compiled_model_dir = f'{model_type}_batch_{batch_size}_inf1_cores_{num_cores}'
        inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)

        print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')
        col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'

        res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,
                                                                         batch_size = batch_size,
                                                                         user_batch_size = batch_size*10,
                                                                         num_cores = num_cores,
                                                                         use_cache=False, 
                                                                         warm_up=10)

        iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)
        results = pd.concat([results, res], axis=1)

    display(results)

batch size: 1 core nums 1 compile start
-----------batch size: 1, num cores: 1----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron runtime: 874
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/saved_model.pb
INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1
Compile time: 60.27557849884033
resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1
{'OnNeuronRatio': 0.997716894977169}
----------- Done! ----------- 

inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50

Unnamed: 0,test_time,compiled_batch_size,user_batch_size,accuracy,prediction_time,sess_time,extend_time,labeling_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_1,0.001829,1.0,10.0,0.919,2.214002,0.825889,0.006285,0.004369,5.135721,464.01119,75.397037,22.140017,30.942917,21.709323,15.019894


Unnamed: 0,inf1_compiled_batch_size_1_compiled_cores_1
test_time,0.001829
compiled_batch_size,1.0
user_batch_size,10.0
accuracy,0.919
prediction_time,2.214002
sess_time,0.825889
extend_time,0.006285
labeling_time,0.004369
wall_time,5.135721
images_per_sec_mean,464.01119


In [8]:
inf1_model_dir = 'resnet50_inf1_saved_models'
saved_model_dir = 'resnet50_saved_model'


# testing batch size
batch_list = [1,2,4,8]
num_of_cores = [1,2,3,4]
for batch in batch_list:
    for core in num_of_cores:
        print('batch size:', batch,'core nums', core,'compile start')
        compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=batch, num_cores=core)

inf1_model_dir = 'resnet50_inf1_saved_models'

for batch_size in batch_list:
    iter_ds = pd.DataFrame()
    results = pd.DataFrame()
    for num_cores in num_of_cores:
        opt ={'batch_size': batch_size, 'num_cores': num_of_cores}
        compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'
        inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)

        print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')
        col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'

        res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,
                                                                         batch_size = batch_size,
                                                                         user_batch_size = batch_size*10,
                                                                         num_cores = num_cores,
                                                                         use_cache=False, 
                                                                         warm_up=10)

        iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)
        results = pd.concat([results, res], axis=1)

    display(results)

batch size: 1 core nums 1 compile start
-----------batch size: 1, num cores: 1----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:fusing subgraph {subgraph neuron_op_d6f098c01c780733 with input tensors ["<tf.Tensor 'input_10/_0:0' shape=(1, 224, 224, 3) dtype=float32>"], output tensors ["<tf.Tensor 'probs/Softmax:0' shape=(1, 1000) dtype=float32>"]} with neuron-cc; you may check progress by inspecting file /home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron runtime: 874
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets 

Compile time: 74.6214702129364
resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_2
{'OnNeuronRatio': 0.997716894977169}
----------- Done! ----------- 

batch size: 2 core nums 3 compile start
-----------batch size: 2, num cores: 3----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:fusing subgraph {subgraph neuron_op_d6f098c01c780733 with input tensors ["<tf.Tensor 'input_10/_0:0' shape=(2, 224, 224, 3) dtype=float32>"], output tensors ["<tf.Tensor 'probs/Softmax:0' shape=(2, 1000) dtype=float32>"]} with neuron-cc; you may check progress by inspecting file /home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_3/neuron_op_d6f098c01c780733/graph_def.neuron-cc.log
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after 

INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_4
Compile time: 90.48108291625977
resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_4
{'OnNeuronRatio': 0.997716894977169}
----------- Done! ----------- 

batch size: 8 core nums 1 compile start
-----------batch size: 8, num cores: 1----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:fusing subgraph {subgraph neuron_op_d6f098c01c780733 with input tensors ["<tf.Tensor 'input_10/_0:0' shape=(8, 224, 224, 3) dtype=float32>"], output tensors ["<tf.Tensor 'probs/Softmax:0' shape=(8, 1000) dtype=float32>"]} with neuron-cc; you may check progress by inspecting file /home/ubuntu/ai-accelerators-examples/compiler-workdir/resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_1/neuron_op_d6f098c01c780733/graph_def.n

Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_1,1,10,0.919,2.07635,5.27607,494.531,77.4011,20.7635,32.0146,20.314,15.1401


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_2, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 378.29816884027076
Images 200/1000. Average i/s 369.1599504482385
Images 300/1000. Average i/s 364.3336871014851
Images 400/1000. Average i/s 365.26393398818345
Images 500/1000. Average i/s 368.1493914234497
Images 600/1000. Average i/s 368.84109898393206
Images 700/1000. Average i/s 371.6954325409117
Images 800/1000. Average i/s 371.23428869284004
Images 900/1000. Average i/s 368.69090112795544


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_2,1,10,0.917,2.73537,4.17314,368.7,33.5295,27.3537,34.198,26.9749,23.1512


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_3, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 358.47489498428735
Images 200/1000. Average i/s 362.35928027580985
Images 300/1000. Average i/s 362.83082080391307
Images 400/1000. Average i/s 363.0468475760616
Images 500/1000. Average i/s 364.24753589474057
Images 600/1000. Average i/s 363.30958170896406
Images 700/1000. Average i/s 362.89392006955904
Images 800/1000. Average i/s 365.6958459589164
Images 900/1000. Average i/s 364.11055119644897


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_3,1,10,0.918,2.7586,4.27854,365.255,31.8413,27.586,34.0261,27.7255,22.9688


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_1_inf1_cores_4, user_batch_size: 10

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 370.7846131981328
Images 200/1000. Average i/s 372.1315479996698
Images 300/1000. Average i/s 378.2393905782272
Images 400/1000. Average i/s 381.8605803215907
Images 500/1000. Average i/s 377.2933310045669
Images 600/1000. Average i/s 374.8127918566922
Images 700/1000. Average i/s 375.0691541549424
Images 800/1000. Average i/s 375.9088029877493
Images 900/1000. Average i/s 373.7426509683528


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_1_compiled_cores_4,1,10,0.916,2.71408,4.23677,373.914,44.7429,27.1408,34.3733,27.0547,21.3037


Unnamed: 0,inf1_compiled_batch_size_1_compiled_cores_1,inf1_compiled_batch_size_1_compiled_cores_2,inf1_compiled_batch_size_1_compiled_cores_3,inf1_compiled_batch_size_1_compiled_cores_4
compiled_batch_size,1.0,1.0,1.0,1.0
user_batch_size,10.0,10.0,10.0,10.0
accuracy,0.919,0.917,0.918,0.916
prediction_time,2.07635,2.73537,2.7586,2.71408
wall_time,5.27607,4.17314,4.27854,4.23677
images_per_sec_mean,494.531,368.7,365.255,373.914
images_per_sec_std,77.4011,33.5295,31.8413,44.7429
latency_mean,20.7635,27.3537,27.586,27.1408
latency_99th_percentile,32.0146,34.198,34.0261,34.3733
latency_median,20.314,26.9749,27.7255,27.0547


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_1, user_batch_size: 20

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 704.9655184505747
Images 200/1000. Average i/s 686.8056661861427
Images 300/1000. Average i/s 695.15434797939
Images 400/1000. Average i/s 703.7266995970158
Images 500/1000. Average i/s 709.8235837069883
Images 600/1000. Average i/s 713.3601965301773
Images 700/1000. Average i/s 714.115214334095
Images 800/1000. Average i/s 712.6002997133128
Images 900/1000. Average i/s 709.7351064163612


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_2_compiled_cores_1,2,20,0.919,1.42135,5.05111,712.862,82.7052,28.4271,32.937,28.7254,21.2288


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_2, user_batch_size: 20

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 587.3956000029093
Images 200/1000. Average i/s 577.9303497486462
Images 300/1000. Average i/s 578.1600927328019
Images 400/1000. Average i/s 581.3878972030352
Images 500/1000. Average i/s 586.658929962759
Images 600/1000. Average i/s 587.2832423133121
Images 700/1000. Average i/s 588.7559762777782
Images 800/1000. Average i/s 586.4661435595976
Images 900/1000. Average i/s 584.5084193150299


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_2_compiled_cores_2,2,20,0.919,1.71493,3.99327,586.367,43.4715,34.2987,41.1391,34.3143,30.2856


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_3, user_batch_size: 20

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 481.41549257789774
Images 200/1000. Average i/s 486.2574685430165
Images 300/1000. Average i/s 477.360774292507
Images 400/1000. Average i/s 473.9759646142611
Images 500/1000. Average i/s 468.40943944410776
Images 600/1000. Average i/s 464.8227287109459
Images 700/1000. Average i/s 464.02450561381494
Images 800/1000. Average i/s 463.9664544059976
Images 900/1000. Average i/s 465.04986742783126


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_2_compiled_cores_3,2,20,0.918,2.15949,4.42366,465.17,31.161,43.1897,49.2129,42.944,38.5716


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_2_inf1_cores_4, user_batch_size: 20

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 100/1000. Average i/s 468.68929932223597
Images 200/1000. Average i/s 475.2261011762757
Images 300/1000. Average i/s 477.8019369796924
Images 400/1000. Average i/s 481.1525716533794
Images 500/1000. Average i/s 485.29842410235665
Images 600/1000. Average i/s 486.7659299617728
Images 700/1000. Average i/s 489.9298502326245
Images 800/1000. Average i/s 489.7023274494549
Images 900/1000. Average i/s 490.4832238601535


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_2_compiled_cores_4,2,20,0.918,2.05547,4.3961,488.587,31.3678,41.1094,47.4737,40.5638,37.0972


Unnamed: 0,inf1_compiled_batch_size_2_compiled_cores_1,inf1_compiled_batch_size_2_compiled_cores_2,inf1_compiled_batch_size_2_compiled_cores_3,inf1_compiled_batch_size_2_compiled_cores_4
compiled_batch_size,2.0,2.0,2.0,2.0
user_batch_size,20.0,20.0,20.0,20.0
accuracy,0.919,0.919,0.918,0.918
prediction_time,1.42135,1.71493,2.15949,2.05547
wall_time,5.05111,3.99327,4.42366,4.3961
images_per_sec_mean,712.862,586.367,465.17,488.587
images_per_sec_std,82.7052,43.4715,31.161,31.3678
latency_mean,28.4271,34.2987,43.1897,41.1094
latency_99th_percentile,32.937,41.1391,49.2129,47.4737
latency_median,28.7254,34.3143,42.944,40.5638


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_1, user_batch_size: 40

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 120/1000. Average i/s 946.6046300538176
Images 200/1000. Average i/s 928.8118184201385
Images 320/1000. Average i/s 941.5954056262001
Images 400/1000. Average i/s 948.360849633917
Images 520/1000. Average i/s 931.4544730418747
Images 600/1000. Average i/s 934.6779984906277
Images 720/1000. Average i/s 925.5655037991526
Images 800/1000. Average i/s 929.927462820811
Images 920/1000. Average i/s 929.3154418784488


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_4_compiled_cores_1,4,40,0.918,1.07485,5.31716,937.07,82.434,42.9941,48.456,44.0359,35.6128


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_2, user_batch_size: 40

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 120/1000. Average i/s 655.6942118144061
Images 200/1000. Average i/s 649.3826719512305
Images 320/1000. Average i/s 643.895209841024
Images 400/1000. Average i/s 643.1007866662293
Images 520/1000. Average i/s 649.0126953542184
Images 600/1000. Average i/s 652.2704970058389
Images 720/1000. Average i/s 652.6767713348651
Images 800/1000. Average i/s 651.8760150059696
Images 920/1000. Average i/s 648.0215226084445


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_4_compiled_cores_2,4,40,0.92,1.54225,4.27291,649.45,26.3584,61.69,66.4597,61.2352,58.1112


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_3, user_batch_size: 40

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 120/1000. Average i/s 525.6273196791157
Images 200/1000. Average i/s 530.2912493721417
Images 320/1000. Average i/s 525.0158565954534
Images 400/1000. Average i/s 522.5375155304824
Images 520/1000. Average i/s 523.198863060952
Images 600/1000. Average i/s 525.5774159413406
Images 720/1000. Average i/s 526.7405021003067
Images 800/1000. Average i/s 526.3344439535192
Images 920/1000. Average i/s 527.9970483452138


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_4_compiled_cores_3,4,40,0.917,1.89271,4.73493,529.108,20.3744,75.7084,80.1184,75.8688,71.6991


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_4_inf1_cores_4, user_batch_size: 40

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 120/1000. Average i/s 528.7652878352374
Images 200/1000. Average i/s 532.4414967036586
Images 320/1000. Average i/s 535.6884920815359
Images 400/1000. Average i/s 540.1050133848522
Images 520/1000. Average i/s 541.8040704391058
Images 600/1000. Average i/s 541.861244534631
Images 720/1000. Average i/s 544.6038553290156
Images 800/1000. Average i/s 543.8359555098248
Images 920/1000. Average i/s 545.6803524086135


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_4_compiled_cores_4,4,40,0.917,1.83033,4.75168,546.894,17.4719,73.2132,78.0282,73.0152,69.4427


Unnamed: 0,inf1_compiled_batch_size_4_compiled_cores_1,inf1_compiled_batch_size_4_compiled_cores_2,inf1_compiled_batch_size_4_compiled_cores_3,inf1_compiled_batch_size_4_compiled_cores_4
compiled_batch_size,4.0,4.0,4.0,4.0
user_batch_size,40.0,40.0,40.0,40.0
accuracy,0.918,0.92,0.917,0.917
prediction_time,1.07485,1.54225,1.89271,1.83033
wall_time,5.31716,4.27291,4.73493,4.75168
images_per_sec_mean,937.07,649.45,529.108,546.894
images_per_sec_std,82.434,26.3584,20.3744,17.4719
latency_mean,42.9941,61.69,75.7084,73.2132
latency_99th_percentile,48.456,66.4597,80.1184,78.0282
latency_median,44.0359,61.2352,75.8688,73.0152


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_1, user_batch_size: 80

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 972.7132523386575
Images 240/1000. Average i/s 988.1887296695729
Images 320/1000. Average i/s 999.381915259985
Images 400/1000. Average i/s 989.6220012091071
Images 560/1000. Average i/s 994.5083761405009
Images 640/1000. Average i/s 1005.5151734877836
Images 720/1000. Average i/s 983.2168738920936
Images 800/1000. Average i/s 980.4302078923048
Images 960/1000. Average i/s 1038.3875391989666


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_8_compiled_cores_1,8,80,0.919,1.03221,6.29515,1038.39,222.841,79.4006,89.0439,79.7191,46.1669


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_2, user_batch_size: 80

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 661.8002004373762
Images 240/1000. Average i/s 664.052546131378
Images 320/1000. Average i/s 662.1026663181403
Images 400/1000. Average i/s 658.7880000157932
Images 560/1000. Average i/s 657.9348567560796
Images 640/1000. Average i/s 657.3248047982984
Images 720/1000. Average i/s 654.6893530383979
Images 800/1000. Average i/s 655.1087700924744
Images 960/1000. Average i/s 699.367091481284


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_8_compiled_cores_2,8,80,0.918,1.52993,5.06771,699.367,157.889,117.687,124.569,121.824,65.3834


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_3, user_batch_size: 80

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 537.6320857666816
Images 240/1000. Average i/s 535.8976836735848
Images 320/1000. Average i/s 536.2153060889356
Images 400/1000. Average i/s 537.2152522193093
Images 560/1000. Average i/s 537.3878572960961
Images 640/1000. Average i/s 538.1326753884833
Images 720/1000. Average i/s 538.1115012158859
Images 800/1000. Average i/s 538.5050242491548
Images 960/1000. Average i/s 574.6230314553595


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_8_compiled_cores_3,8,80,0.919,1.86282,5.68175,574.623,131.114,143.294,150.746,147.969,79.1731


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_8_inf1_cores_4, user_batch_size: 80

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 560.4063241463874
Images 240/1000. Average i/s 562.2173111384687
Images 320/1000. Average i/s 558.6475267002336
Images 400/1000. Average i/s 560.0959960185596
Images 560/1000. Average i/s 559.4843542563256
Images 640/1000. Average i/s 559.7306240902705
Images 720/1000. Average i/s 558.1406696502743
Images 800/1000. Average i/s 558.5604917931731
Images 960/1000. Average i/s 595.1590565076076


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_8_compiled_cores_4,8,80,0.918,1.79815,5.65712,595.159,134.973,138.319,146.959,142.429,76.6749


Unnamed: 0,inf1_compiled_batch_size_8_compiled_cores_1,inf1_compiled_batch_size_8_compiled_cores_2,inf1_compiled_batch_size_8_compiled_cores_3,inf1_compiled_batch_size_8_compiled_cores_4
compiled_batch_size,8.0,8.0,8.0,8.0
user_batch_size,80.0,80.0,80.0,80.0
accuracy,0.919,0.918,0.919,0.918
prediction_time,1.03221,1.52993,1.86282,1.79815
wall_time,6.29515,5.06771,5.68175,5.65712
images_per_sec_mean,1038.39,699.367,574.623,595.159
images_per_sec_std,222.841,157.889,131.114,134.973
latency_mean,79.4006,117.687,143.294,138.319
latency_99th_percentile,89.0439,124.569,150.746,146.959
latency_median,79.7191,121.824,147.969,142.429


In [None]:
inf1_model_dir = 'resnet50_inf1_saved_models'
saved_model_dir = 'resnet50_saved_model'


# testing batch size
batch_list = [16,32,64]
num_of_cores = [1,2,3,4]
for batch in batch_list:
    for core in num_of_cores:
        print('batch size:', batch,'core nums', core,'compile start')
        compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=batch, num_cores=core)

inf1_model_dir = 'resnet50_inf1_saved_models'

for batch_size in batch_list:
    iter_ds = pd.DataFrame()
    results = pd.DataFrame()
    for num_cores in num_of_cores:
        opt ={'batch_size': batch_size, 'num_cores': num_of_cores}
        compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'
        inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)

        print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')
        col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'

        res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,
                                                                         batch_size = batch_size,
                                                                         user_batch_size = batch_size*10,
                                                                         num_cores = num_cores,
                                                                         use_cache=False, 
                                                                         warm_up=10)

        iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)
        results = pd.concat([results, res], axis=1)

    display(results)

batch size: 16 core nums 1 compile start
-----------batch size: 16, num cores: 1----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron runtime: 874
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_1/saved_model.pb
INFO:tensorflow:Successfully converted resnet50_saved_model to resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_1
Compile time: 172.0479874610901
resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_1
{'OnNeuronRatio': 0.997716894977169}
----------- Done! ----------- 

batch size: 16 core nums 2 compile start
-----------bat

INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 875
INFO:tensorflow:Number of operations placed on Neuron runtime: 0
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1/saved_model.pb
Compile time: 222.82894659042358
resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1
{'OnNeuronRatio': 0.0}
----------- Done! ----------- 

batch size: 64 core nums 2 compile start
-----------batch size: 64, num cores: 2----------
Compiling...
INFO:tensorflow:Restoring parameters from resnet50_saved_model/variables/variables
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
INFO:tensorflow:Number of operations in TensorFlow session: 4647
INFO:tensorflow:Number of operations after tf.neuron optimizations: 876
INFO:tensorflow:Number of operations placed on Neuron 

Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_16_compiled_cores_1,16,160,0.919,1.05853,8.41636,1186.47,580.991,151.219,171.633,161.814,64.0028


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_2, user_batch_size: 160

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 650.711581839887
Images 320/1000. Average i/s 649.8293346981217
Images 480/1000. Average i/s 650.2583839671977
Images 640/1000. Average i/s 649.639686236737
Images 800/1000. Average i/s 649.8227563930037
Images 960/1000. Average i/s 804.6659712251743


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_16_compiled_cores_2,16,160,0.918,1.56966,6.57142,804.666,409.692,224.237,247.232,245.875,92.2868


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_3, user_batch_size: 160

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 534.5110968191087
Images 320/1000. Average i/s 535.2149904219474
Images 480/1000. Average i/s 535.8608695326207
Images 640/1000. Average i/s 536.7788673234196
Images 800/1000. Average i/s 537.0094731036515
Images 960/1000. Average i/s 668.3766891170401


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_16_compiled_cores_3,16,160,0.918,1.89757,7.54165,668.377,347.575,271.081,298.161,297.509,109.846


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_16_inf1_cores_4, user_batch_size: 160

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 160/1000. Average i/s 573.570159696899
Images 320/1000. Average i/s 572.4403994473164
Images 480/1000. Average i/s 573.5204490249604
Images 640/1000. Average i/s 575.2353671869381
Images 800/1000. Average i/s 576.0243247774307
Images 960/1000. Average i/s 718.0061554809964


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_16_compiled_cores_4,16,160,0.917,1.7687,7.30009,718.006,375.698,252.672,280.613,275.877,101.918


Unnamed: 0,inf1_compiled_batch_size_16_compiled_cores_1,inf1_compiled_batch_size_16_compiled_cores_2,inf1_compiled_batch_size_16_compiled_cores_3,inf1_compiled_batch_size_16_compiled_cores_4
compiled_batch_size,16.0,16.0,16.0,16.0
user_batch_size,160.0,160.0,160.0,160.0
accuracy,0.919,0.918,0.918,0.917
prediction_time,1.05853,1.56966,1.89757,1.7687
wall_time,8.41636,6.57142,7.54165,7.30009
images_per_sec_mean,1186.47,804.666,668.377,718.006
images_per_sec_std,580.991,409.692,347.575,375.698
latency_mean,151.219,224.237,271.081,252.672
latency_99th_percentile,171.633,247.232,298.161,280.613
latency_median,161.814,245.875,297.509,275.877


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_1, user_batch_size: 320

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 320/1000. Average i/s 917.544217768971
Images 640/1000. Average i/s 927.5564460541691
Images 960/1000. Average i/s 1381.3206216657386


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_32_compiled_cores_1,32,320,0.918,1.15191,11.2338,1381.32,907.646,287.977,347.06,342.381,116.677


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_2, user_batch_size: 320

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 320/1000. Average i/s 895.1001793430731
Images 640/1000. Average i/s 897.2214866781342
Images 960/1000. Average i/s 1334.8267027983086


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_32_compiled_cores_2,32,320,0.92,1.19085,10.1797,1334.83,875.216,297.711,357.048,356.013,120.862


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_3, user_batch_size: 320

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 320/1000. Average i/s 363.3270338063871
Images 640/1000. Average i/s 362.1930903275126
Images 960/1000. Average i/s 597.5390826670465


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_32_compiled_cores_3,32,320,0.919,2.89605,15.6802,597.539,470.695,724.014,882.01,880.751,245.478


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_32_inf1_cores_4, user_batch_size: 320

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 320/1000. Average i/s 556.6853079774876
Images 640/1000. Average i/s 557.7798585255388
Images 960/1000. Average i/s 924.1610302697818


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_32_compiled_cores_4,32,320,0.919,1.87931,11.5172,924.161,732.767,469.827,572.078,571.769,158.157


Unnamed: 0,inf1_compiled_batch_size_32_compiled_cores_1,inf1_compiled_batch_size_32_compiled_cores_2,inf1_compiled_batch_size_32_compiled_cores_3,inf1_compiled_batch_size_32_compiled_cores_4
compiled_batch_size,32.0,32.0,32.0,32.0
user_batch_size,320.0,320.0,320.0,320.0
accuracy,0.918,0.92,0.919,0.919
prediction_time,1.15191,1.19085,2.89605,1.87931
wall_time,11.2338,10.1797,15.6802,11.5172
images_per_sec_mean,1381.32,1334.83,597.539,924.161
images_per_sec_std,907.646,875.216,470.695,732.767
latency_mean,287.977,297.711,724.014,469.827
latency_99th_percentile,347.06,357.048,882.01,572.078
latency_median,342.381,356.013,880.751,571.769


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1, user_batch_size: 640

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.


In [9]:
inf1_model_dir = 'resnet50_inf1_saved_models'
saved_model_dir = 'resnet50_saved_model'


# testing batch size
batch_list = [64]
num_of_cores = [1,2,3,4]

inf1_model_dir = 'resnet50_inf1_saved_models'

for batch_size in batch_list:
    iter_ds = pd.DataFrame()
    results = pd.DataFrame()
    for num_cores in num_of_cores:
        opt ={'batch_size': batch_size, 'num_cores': num_of_cores}
        compiled_model_dir = f'resnet50_batch_{batch_size}_inf1_cores_{num_cores}'
        inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)

        print(f'inf1_compiled_model_dir: {inf1_compiled_model_dir}')
        col_name = lambda opt: f'inf1_{batch_size}_multicores_{num_cores}'

        res, iter_times = inf1_predict_benchmark_single_threaded(inf1_compiled_model_dir,
                                                                         batch_size = batch_size,
                                                                         user_batch_size = batch_size*10,
                                                                         num_cores = num_cores,
                                                                         use_cache=False, 
                                                                         warm_up=10)

        iter_ds = pd.concat([iter_ds, pd.DataFrame(iter_times, columns=[col_name(opt)])], axis=1)
        results = pd.concat([results, res], axis=1)

    display(results)

inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1
Running model resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_1, user_batch_size: 640

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Instructions for updating:
Use 

Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_64_compiled_cores_1,64,640,0.919,47.9022,357.486,28.9871,11.4617,23951.1,17254.5,23951.1,17254.5


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_2
Running model resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_2, user_batch_size: 640

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 640/1000. Average i/s 787.8184209891378


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_64_compiled_cores_2,64,640,0.917,1.69328,17.1432,787.818,224.157,846.641,676.303,846.641,676.303


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_3
Running model resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_3, user_batch_size: 640

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 640/1000. Average i/s 692.7102901055875


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_64_compiled_cores_3,64,640,0.917,1.93524,19.6588,692.71,208.219,967.62,761.956,967.62,761.956


inf1_compiled_model_dir: resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_4
Running model resnet50_inf1_saved_models/resnet50_batch_64_inf1_cores_4, user_batch_size: 640

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.
Images 640/1000. Average i/s 730.8802621805783


Unnamed: 0,compiled_batch_size,user_batch_size,accuracy,prediction_time,wall_time,images_per_sec_mean,images_per_sec_std,latency_mean,latency_99th_percentile,latency_median,latency_min
inf1_compiled_batch_size_64_compiled_cores_4,64,640,0.918,1.83603,19.3468,730.88,222.029,918.015,720.819,918.015,720.819


Unnamed: 0,inf1_compiled_batch_size_64_compiled_cores_1,inf1_compiled_batch_size_64_compiled_cores_2,inf1_compiled_batch_size_64_compiled_cores_3,inf1_compiled_batch_size_64_compiled_cores_4
compiled_batch_size,64.0,64.0,64.0,64.0
user_batch_size,640.0,640.0,640.0,640.0
accuracy,0.919,0.917,0.917,0.918
prediction_time,47.9022,1.69328,1.93524,1.83603
wall_time,357.486,17.1432,19.6588,19.3468
images_per_sec_mean,28.9871,787.818,692.71,730.88
images_per_sec_std,11.4617,224.157,208.219,222.029
latency_mean,23951.1,846.641,967.62,918.015
latency_99th_percentile,17254.5,676.303,761.956,720.819
latency_median,23951.1,846.641,967.62,918.015
