# AWS Inferentia inference on Amazon EC2 Inf1 instance
This example demonstrates AWS Inferentia inference with TensorFlow and AWS Neuron SDK compiler and runtime

This example was tested on Amazon EC2 `inf1.xlarge` the following AWS Deep Learning AMI: 
`Deep Learning AMI (Ubuntu 18.04) Version 35.0`

Run this notebook using the following conda environment:
`aws_neuron_tensorflow_p36`

Prepare your imagenet validation TFRecord files using the following helper script:
https://github.com/tensorflow/models/blob/archive/research/inception/inception/data/download_and_preprocess_imagenet.sh

Save it to `/home/ubuntu/datasets/` or update the dataset location in the `get_dataset()` function

In [1]:
import os
import time
import shutil
import json
import requests
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.neuron as tfn
import tensorflow.compat.v1.keras as keras
from tensorflow.keras.applications import ( 
    xception,
    vgg16,
    vgg19,
    resnet,
    resnet50,
    resnet_v2,
    inception_v3,
    inception_resnet_v2,
    mobilenet,
    densenet,
    nasnet,
    mobilenet_v2
)
# from keras import backend as K
from tensorflow.keras.preprocessing import image
# from concurrent import futures
from itertools import compress


models = {
#     'xception':xception,
    'vgg16':vgg16,
#     'vgg19':vgg19,
#     'resnet50':resnet50,
#     'resnet101':resnet,
#     'resnet152':resnet,
#     'resnet50_v2':resnet_v2,
#     'resnet101_v2':resnet_v2,
#     'resnet152_v2':resnet_v2,
#     'resnext50':resnext,
#     'resnext101':resnext,
#     'inception_v3':inception_v3,
#     'inception_resnet_v2':inception_resnet_v2,
#     'mobilenet':mobilenet,
#     'densenet121':densenet,
#     'densenet169':densenet,
#     'densenet201':densenet,
#     'nasnet':nasnet,
#     'nasnet':nasnet,
#     'mobilenet_v2':mobilenet_v2
}

models_detail = {
#     'xception':xception.Xception(weights='imagenet'),
    'vgg16':vgg16.VGG16(weights='imagenet'),
#     'vgg19':vgg19.VGG19(weights='imagenet'),
#     'resnet50':resnet50.ResNet50(weights='imagenet'),
#     'resnet101':resnet.ResNet101(weights='imagenet'),
#     'resnet152':resnet.ResNet152(weights='imagenet'),
#     'resnet50_v2':resnet_v2.ResNet50V2(weights='imagenet'),
#     'resnet101_v2':resnet_v2.ResNet101V2(weights='imagenet'),
#     'resnet152_v2':resnet_v2.ResNet152V2(weights='imagenet'),
#     'resnext50':resnext.ResNeXt50(weights='imagenet'),
#     'resnext101':resnext.ResNeXt101(weights='imagenet'),
#     'inception_v3':inception_v3.InceptionV3(weights='imagenet'),
#     'inception_resnet_v2':inception_resnet_v2.InceptionResNetV2(weights='imagenet'),
#     'mobilenet':mobilenet.MobileNet(weights='imagenet'),
#     'densenet121':densenet.DenseNet121(weights='imagenet'),
#     'densenet169':densenet.DenseNet169(weights='imagenet'),
#     'densenet201':densenet.DenseNet201(weights='imagenet'),
#     'nasnet':nasnet.NASNetLarge(weights='imagenet'),
#     'nasnet':nasnet.NASNetMobile(weights='imagenet'),
#     'mobilenet_v2':mobilenet_v2.MobileNetV2(weights='imagenet')
}


print('test')

test


In [2]:
model_type = 'vgg16'

# https://github.com/tensorflow/tensorflow/issues/29931
temp = tf.zeros([8, 224, 224, 3])
_ = models[model_type].preprocess_input(temp)

print('test2')

test2


### Resnet50 FP32 saved model

In [3]:
# Export SavedModel

saved_model_dir = f'{model_type}_saved_model'
shutil.rmtree(saved_model_dir, ignore_errors=True)

model = models_detail[model_type]

model.save(saved_model_dir)

from tensorflow.keras.models import load_model
model = load_model(saved_model_dir, compile=True)

model.summary()

INFO:tensorflow:Assets written to: vgg16_saved_model/assets
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
b

### Compile models with different batch sizes and cores

In [4]:
def compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=1, num_cores=1, use_static_weights=False):
    print(f'-----------batch size: {batch_size}, num cores: {num_cores}----------')
    print('Compiling...')
    
    compiled_model_dir = f'{model_type}_batch_{batch_size}_inf1_cores_{num_cores}'
    inf1_compiled_model_dir = os.path.join(inf1_model_dir, compiled_model_dir)
    shutil.rmtree(inf1_compiled_model_dir, ignore_errors=True)

    example_input = np.zeros([batch_size,224,224,3], dtype='float32')
    !env NEURON_CC_FLAGS="--neuroncore-pipeline-cores=4"
    
    start_time = time.time()
    compiled_model = tfn.trace(model,example_input)
    compiled_res = compiled_model.save(inf1_compiled_model_dir)
    print(f'Compile time: {time.time() - start_time}')
    
    compile_success = False
#     perc_on_inf = compiled_res['OnNeuronRatio'] * 100
#     if perc_on_inf > 50:
#         compile_success = True
            
    print(inf1_compiled_model_dir)
    print(compiled_res)
    print('----------- Done! ----------- \n')
    
    return compile_success

In [6]:
inf1_model_dir = f'{model_type}_inf1_saved_models'
saved_model_dir = f'{model_type}_saved_model'


# testing batch size
batch_list = [64]
num_of_cores = [1]
for batch in batch_list:
    for core in num_of_cores:
        print('batch size:', batch,'core nums', core,'compile start')
        compile_inf1_model(saved_model_dir, inf1_model_dir, batch_size=batch, num_cores=core)


batch size: 64 core nums 1 compile start
-----------batch size: 64, num cores: 1----------
Compiling...
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.Z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01

batch size 64 would create 784 MB cache demand; rewriting batch size to mitigate


INFO:tensorflow:Assets written to: vgg16_inf1_saved_models/vgg16_batch_64_inf1_cores_1/assets


INFO:tensorflow:Assets written to: vgg16_inf1_saved_models/vgg16_batch_64_inf1_cores_1/assets


Compile time: 263.71023321151733
vgg16_inf1_saved_models/vgg16_batch_64_inf1_cores_1
None
----------- Done! ----------- 

