# Virtual Concierge

## Compile Pretrained MXNET model with NEO

In this notebook we will download a pre-trained MXNET model and compile for `ml_m4` and `deeplens` targets.

In [20]:
# Download pre-trained model if haven't already created from previous notebook
import os

if not os.path.exists('model.tar.gz'):
    !aws s3 cp s3://deeplens-virtual-concierge-model/mobilefacenet/model.tar.gz .

## Invoke Neo Compilation API

We then forward the model artifact to Neo Compilation API:

In [21]:
import boto3
import sagemaker
import time
from sagemaker.utils import name_from_base

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
region = sess.boto_region_name
bucket = sess.default_bucket()

compilation_job_name = name_from_base('virtual-concierge')

model_key = '{}/model/model.tar.gz'.format(compilation_job_name)
model_path = 's3://{}/{}'.format(bucket, model_key)
boto3.resource('s3').Bucket(bucket).upload_file('model.tar.gz', model_key)

sm_client = boto3.client('sagemaker')
data_shape = '{"data":[1,3,112,112]}'
target_device = 'ml_m4'
framework = 'MXNET'
framework_version = '1.2'
compiled_model_path = 's3://{}/{}/output'.format(bucket, compilation_job_name)

INFO:sagemaker:Created S3 bucket: sagemaker-us-east-1-423079281568


In [22]:
response = sm_client.create_compilation_job(
    CompilationJobName=compilation_job_name,
    RoleArn=role,
    InputConfig={
        'S3Uri': model_path,
        'DataInputConfig': data_shape,
        'Framework': framework
    },
    OutputConfig={
        'S3OutputLocation': compiled_model_path,
        'TargetDevice': target_device
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 300
    }
)
print(response)

# Poll every 30 sec
while True:
    response = sm_client.describe_compilation_job(CompilationJobName=compilation_job_name)
    if response['CompilationJobStatus'] == 'COMPLETED':
        break
    elif response['CompilationJobStatus'] == 'FAILED':
        raise RuntimeError('Compilation failed')
    print('Compiling ...')
    time.sleep(30)
print('Done!')

# Extract compiled model artifact
compiled_model_path = response['ModelArtifacts']['S3ModelArtifacts']

{'CompilationJobArn': 'arn:aws:sagemaker:us-east-1:423079281568:compilation-job/virtual-concierge-2019-04-03-00-00-36-900', 'ResponseMetadata': {'RequestId': 'f397848a-d9b1-427e-b14e-f022b83e906b', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'f397848a-d9b1-427e-b14e-f022b83e906b', 'content-type': 'application/x-amz-json-1.1', 'content-length': '122', 'date': 'Wed, 03 Apr 2019 00:00:36 GMT'}, 'RetryAttempts': 0}}
Compiling ...
Done!


## Create prediction endpoint

To create a prediction endpoint, we first specify two additional functions, to be used with Neo Deep Learning Runtime:

* `neo_preprocess(payload, content_type)`: Function that takes in the payload and Content-Type of each incoming request and returns a NumPy array. Here, the payload is byte-encoded NumPy array, so the function simply decodes the bytes to obtain the NumPy array.
* `neo_postprocess(result)`: Function that takes the prediction results produced by Deep Learining Runtime and returns the response body

In [23]:
!pygmentize predict.py

[34mdef[39;49;00m [32mmodel_fn[39;49;00m(model_dir, prefered_batch_size=[34m1[39;49;00m, image_size=([34m112[39;49;00m,[34m112[39;49;00m)):
    [33m"""Function responsible for loading the model.[39;49;00m
[33m    Args:[39;49;00m
[33m        model_dir (str): The directory where model files are stored[39;49;00m
[33m    Returns:[39;49;00m
[33m        mxnet.mod.Module: the loaded model.[39;49;00m
[33m    """[39;49;00m
    
    [34mimport[39;49;00m [04m[36mmxnet[39;49;00m [34mas[39;49;00m [04m[36mmx[39;49;00m
    [34mimport[39;49;00m [04m[36mos[39;49;00m
    [34mimport[39;49;00m [04m[36mlogging[39;49;00m
    
    logging.info([33m'[39;49;00m[33mInvoking model load[39;49;00m[33m'[39;49;00m)    
    
    data_shapes = [([33m'[39;49;00m[33mdata[39;49;00m[33m'[39;49;00m, (prefered_batch_size, [34m3[39;49;00m, image_size[[34m0[39;49;00m], image_size[[34m1[39;49;00m]))]

    sym, args, aux = mx.model.load_checkpoint(os.pa

Upload the Python script containing the two functions to S3:

In [24]:
import tarfile

source_key = '{}/source/sourcedir.tar.gz'.format(compilation_job_name)
source_path = 's3://{}/{}'.format(bucket, source_key)

with tarfile.open('sourcedir.tar.gz', 'w:gz') as f:
    f.add('predict.py')

boto3.resource('s3').Bucket(bucket).upload_file('sourcedir.tar.gz', source_key)

We then create a SageMaker model record:

In [25]:
from sagemaker.model import NEO_IMAGE_ACCOUNT
from sagemaker.fw_utils import create_image_uri

model_name = name_from_base('virtual-concierge') + target_device.replace('_', '-')

image_uri = create_image_uri(region, 'neo-' + framework.lower(), target_device.replace('_', '.'),
                             framework_version, py_version='py3', account=NEO_IMAGE_ACCOUNT[region])

response = sm_client.create_model(
    ModelName=model_name,
    PrimaryContainer={
        'Image': image_uri,
        'ModelDataUrl': compiled_model_path,
        'Environment': { 'SAGEMAKER_SUBMIT_DIRECTORY': source_path }
    },
    ExecutionRoleArn=role
)
print(response)

{'ModelArn': 'arn:aws:sagemaker:us-east-1:423079281568:model/virtual-concierge-2019-04-03-00-01-08-702ml-m4', 'ResponseMetadata': {'RequestId': '9e3291f4-89bc-4dcf-b20c-2c7faa5dfeee', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '9e3291f4-89bc-4dcf-b20c-2c7faa5dfeee', 'content-type': 'application/x-amz-json-1.1', 'content-length': '108', 'date': 'Wed, 03 Apr 2019 00:01:09 GMT'}, 'RetryAttempts': 0}}


Then we create an Endpoint Configuration:

In [26]:
config_name = model_name

response = sm_client.create_endpoint_config(
    EndpointConfigName=config_name,
    ProductionVariants=[
        {
            'VariantName': 'default-variant-name',
            'ModelName': model_name,
            'InitialInstanceCount': 1,
            'InstanceType': 'ml.m4.xlarge',
            'InitialVariantWeight': 1.0
        },
    ],
)
print(response)

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-1:423079281568:endpoint-config/virtual-concierge-2019-04-03-00-01-08-702ml-m4', 'ResponseMetadata': {'RequestId': 'ecd563ba-2cc2-43b8-8066-7d503a7cb1a0', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ecd563ba-2cc2-43b8-8066-7d503a7cb1a0', 'content-type': 'application/x-amz-json-1.1', 'content-length': '127', 'date': 'Wed, 03 Apr 2019 00:01:09 GMT'}, 'RetryAttempts': 0}}


Finally, we create an Endpoint:

In [None]:
endpoint_name = model_name

response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=config_name,
)
print(response)

print('Creating endpoint ...')
sm_client.get_waiter('endpoint_in_service').wait(EndpointName=endpoint_name)

response = sm_client.describe_endpoint(EndpointName=endpoint_name)
print(response)

{'EndpointArn': 'arn:aws:sagemaker:us-east-1:423079281568:endpoint/virtual-concierge-2019-04-03-00-01-08-702ml-m4', 'ResponseMetadata': {'RequestId': '823dd7ff-09d1-418f-843d-21e6a72be2a7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '823dd7ff-09d1-418f-843d-21e6a72be2a7', 'content-type': 'application/x-amz-json-1.1', 'content-length': '114', 'date': 'Wed, 03 Apr 2019 00:01:09 GMT'}, 'RetryAttempts': 0}}
Creating endpoint ...


## Send requests

Download a sample picture, detect the first face

In [None]:
import boto3
import json
import base64
import io
import PIL.Image

s3 = boto3.resource('s3')

# Read image from s3
image = {
    'S3Object': {
        'Bucket': 'aiml-lab-sagemaker',
        'Name': 'politicians/politicians2.jpg'
    }
}

image_object = s3.Object(image['S3Object']['Bucket'] , image['S3Object']['Name'])
payload = image_object.get()['Body'].read()

rekognition = boto3.client('rekognition')
    
# Call rekognition to get bbox
ret = rekognition.detect_faces(
    Image={
        'Bytes': payload
    },
    Attributes=['DEFAULT'],
)

print(ret['FaceDetails'][0]['BoundingBox'])

Crop the image at the bounding box, resize and convert to bytes read for inference

In [None]:
def crop_image(payload, bbox, image_size=(112, 112)):
    f = io.BytesIO(payload)
    # Load image and convert to RGB space
    image = PIL.Image.open(f).convert('RGB')
    # Crop relative to image size
    if bbox != None:
        width, height = image.size
        x1 = int(bbox['Left'] * width)
        y1 = int(bbox['Top'] * height)
        x2 = int(bbox['Left'] * width + bbox['Width'] * width)
        y2 = int(bbox['Top'] * height + bbox['Height']  * height)
        image = image.crop((x1, y1, x2, y2))
    # Resize
    return image.resize(image_size)

# Get a croped image from bytes
image = crop_image(payload, ret['FaceDetails'][0]['BoundingBox'])

# Convert the resized image to bytes
imageBytes = io.BytesIO()
image.save(imageBytes, format='PNG')
payload = imageBytes.getvalue()

image

Send the payload to the endpoint, and output the face embedding response

In [None]:
%%time

import json
import numpy as np

sm_runtime = boto3.Session().client('sagemaker-runtime')

response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name,
                                      ContentType='application/x-image',
                                      Body=payload)
print(response)

In [None]:
np.array(json.loads(response['Body'].read().decode()))[:10]

Send a saved numpy payload to end endpoint

In [None]:
%%time

import json

sm_runtime = boto3.Session().client('sagemaker-runtime')

def numpy_bytes_serializer(data):
    import io
    import numpy as np
    
    f = io.BytesIO()
    np.save(f, data)
    f.seek(0)
    return f.read()

model_input = np.load('input.npy')
payload = numpy_bytes_serializer(model_input)

response = sm_runtime.invoke_endpoint(EndpointName=endpoint_name,
                                      ContentType='application/x-npy',
                                      Body=payload)
print(response)

In [None]:
np.array(json.loads(response['Body'].read().decode()))[:10]

## Clean up

Tear down the Neo endpoint, configuration and model

In [13]:
sm_client.delete_endpoint(EndpointName=endpoint_name)

{'ResponseMetadata': {'RequestId': '24821821-0187-483f-aaea-875acd2141fe',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '24821821-0187-483f-aaea-875acd2141fe',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 01 Apr 2019 08:16:13 GMT'},
  'RetryAttempts': 0}}

In [14]:
sm_client.delete_endpoint_config(EndpointConfigName=config_name)

{'ResponseMetadata': {'RequestId': 'cff53739-4f1b-4398-922a-1d68b86a422e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cff53739-4f1b-4398-922a-1d68b86a422e',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 01 Apr 2019 08:16:13 GMT'},
  'RetryAttempts': 0}}

In [15]:
sm_client.delete_model(ModelName=model_name)

{'ResponseMetadata': {'RequestId': 'ae6e41a0-72e2-45c1-81ad-1a1e4781b315',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ae6e41a0-72e2-45c1-81ad-1a1e4781b315',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 01 Apr 2019 08:16:13 GMT'},
  'RetryAttempts': 0}}