In [None]:
!pip install onnxruntime

In [1]:
from sagemaker import s3, session, get_execution_role
from datetime import datetime
import tarfile, onnxruntime, boto3

# tar the model data and upload to s3

upload to s3 bucket

In [2]:
# bucket = session.Session().default_bucket()
bucket = 'beyoung-sm-yolo5'
s3path = f's3://{bucket}/model/onnx'
fn_gz = 'yolov5s_onnx_model.tar.gz'
fn_model = 'yolov5s_detect.onnx'

In [3]:
# copy onnx model
!cp ../0-yolov5-onnx/{fn_model} {fn_model}

In [4]:
with tarfile.open(fn_gz, mode='w:gz') as archive:
    archive.add(fn_model)
s3uri_model = s3.S3Uploader.upload(fn_gz, s3path)

In [5]:
s3uri_model

's3://beyoung-sm-yolo5/model/onnx/yolov5s_onnx_model.tar.gz'

# Find ONNX model input name and shape

In [6]:
session = onnxruntime.InferenceSession(fn_model)
session.get_modelmeta()

input_lens = len(session.get_inputs())
input_name = session.get_inputs()[0].name
input_shape = session.get_inputs()[0].shape
input_type = session.get_inputs()[0].type

print(f'input_lens: {input_lens} \n input_name:{input_name} input_shape:{input_shape} input_type:{input_type}')

input_lens: 1 
 input_name:images input_shape:[1, 3, 640, 640] input_type:tensor(float)


# SageMaker Neo Compression

In [7]:
client = boto3.client('sagemaker')
role = get_execution_role()

job_name = 'yolo-onnx-neo-' + datetime.now().strftime("%Y-%m-%d-%H-%M")
s3uri_output = f's3://{bucket}/model/onnx/neo'
data_input = f'{{"{input_name}":{input_shape}}}'

f'data_input: {data_input} job: {job_name}'

'data_input: {"images":[1, 3, 640, 640]} job: yolo-onnx-neo-2020-07-23-10-10'

In [8]:
response = client.create_compilation_job(
    CompilationJobName=job_name,
    RoleArn=role,
    InputConfig={
        'S3Uri': s3uri_model,
        'DataInputConfig': data_input,
        'Framework': 'ONNX'
    },
    OutputConfig={
        'S3OutputLocation': s3uri_output,
        'TargetDevice': 'deeplens'
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 600,
        'MaxWaitTimeInSeconds': 600
    }
)
response

{'CompilationJobArn': 'arn:aws:sagemaker:us-west-2:476271697919:compilation-job/yolo-onnx-neo-2020-07-23-10-10',
 'ResponseMetadata': {'RequestId': '8080dfcf-19a5-4146-a55a-1a5c49f17048',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8080dfcf-19a5-4146-a55a-1a5c49f17048',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '111',
   'date': 'Thu, 23 Jul 2020 10:10:24 GMT'},
  'RetryAttempts': 0}}

# Describe the compile job result

In [10]:
response = client.describe_compilation_job(
    CompilationJobName=job_name
)
response['FailureReason']

"ClientError: OperatorNotImplemented:('The following operators are not supported for frontend ONNX: ScatterND, Range"

# References

* ONNX node argument [link](https://microsoft.github.io/onnxruntime/python/api_summary.html#onnxruntime.NodeArg)
* ONNX compression with Neo [link](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker_neo_compilation_jobs/mxnet_mnist/mxnet_mnist_neo.ipynb)
* Hosting ONNX models with Amazon Elastic Inference [link](https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker-python-sdk/mxnet_onnx_eia/mxnet_onnx_eia.ipynb)
