## Libraries to be imported

In [74]:
import os
import pathlib
import tensorflow as tf
import numpy as np
from PIL import Image
from shutil import copytree, rmtree
from glob import glob
import boto3
import tarfile

In [65]:
ZOO_BUCKET = 'sagemaker-us-east-1-113147044314'
ZOO_DIR = 'tf-model-zoo'

## Helper Function to copy [Tensorflow Model Zoo](https://github.com/tensorflow/models) object detection models to S3 for SageMaker deployment

SageMaker uses Tensorflow Serving, which requires the model to be in a folder structure like this:

```
model_dir/
  |
  +-version/
       |
       +-variables/
       |
       +-saved_model.pb
```

The models available in model zoo object detection are in a tar file, containing a folder structure in the following format:
```
model_dir/
  |
  +-<several files>
  |
  +-saved_model/
      |
      +-variables/
      |
      +-saved_model.pb
```

This functions downloads the tar file from the zoo, unpacks it and takes the `saved_model` directory. It then restructures its content to the format required by Tensorflow Serving.

In [88]:
def load_model_in_s3(model_name, s3_bucket=ZOO_BUCKET, s3_path=ZOO_DIR):
    base_url = 'http://download.tensorflow.org/models/object_detection/'
    model_file = model_name + '.tar.gz'
    model_dir = tf.keras.utils.get_file(
    fname=model_name, 
    origin=base_url + model_file,
    cache_dir='/tmp',
    untar=True)

    model_dir = pathlib.Path(model_dir)/"saved_model"
    rmtree(f'/tmp/sm-models/{model_name}', ignore_errors=True)
    copytree(model_dir.absolute().as_posix(), f'/tmp/sm-models/{model_name}/1')
    cur_dir = os.getcwd()
    os.chdir(f'/tmp/sm-models/{model_name}')
    with tarfile.open(f"../{model_name}.tar.gz", "w:gz") as tar:
        for name in glob('*'):
            tar.add(name)
    os.chdir(cur_dir)
            
    s3 = boto3.client('s3')
    s3.upload_file(f"/tmp/sm-models/{model_name}.tar.gz", Bucket=s3_bucket, Key=f'{s3_path}/{model_name}/model.tar.gz')
    return(f's3://{s3_bucket}/{s3_path}/{model_name}/model.tar.gz')

## Helper Functions to Benchmark models on a directory of JPG images

In [181]:
bad_images = ['../data/000000001688.jpg',
              '../data/000000002240.jpg',
              '../data/000000000913.jpg',
              '../data/000000004208.jpg',
              '../data/000000000078.jpg',
              '../data/000000000073.jpg',
              '../data/000000002758.jpg',
              '../data/000000003947.jpg']

In [185]:
def predict_images(model, image_path, max_images=10, skip_images=None):
    if skip_images is None:
        skip_images = []
    images = [(im_file, np.array(Image.open(im_file))) for im_file in glob(f'{image_path}/*.jpg')[:max_images] if im_file not in skip_images]
    result = [(imfile, predict_image(model, image)) for imfile, image in images]
    print([imfile for imfile, output in result if len(output) == 0])
    return(result)

def predict_image(model, image):
    input_dict = {'instances': [image.tolist()]}
    try:
        output_dict = model.predict(input_dict)    
    except Exception as e:
        output_dict = {}
        print(e)
    return output_dict

## Deploying a Model to SageMaker Tensorflow Serving

Once deployed, the model expects a request with the following `body` structure:
```
input = {
  'instances': [nested json list with all dimensions]
}
```

This nested structured can be obtained by converting an array to a list object, for instance using the `numpy.ndarray.tolist()` method. The response is a JSON structure like this:
```
{
  'predictions': [{'<prediction one>': [nested json array]}, {...},...]
}
```

In [None]:
from sagemaker.tensorflow.serving import Model
from sagemaker import get_execution_role
import logging

role = get_execution_role()
sm_model = load_model_in_s3('faster_rcnn_resnet50_coco_2018_01_28')
model = Model(model_data=sm_model, 
              role=role,
              framework_version='2.1.0')
predictor = model.deploy(initial_instance_count=1, instance_type='ml.p2.xlarge')

----

Sanity Check on the prediction endpoint

In [None]:
x = np.array(Image.open('../data/000000004042.jpg'))
x.shape

In [None]:
predictor.predict({'instances':[x.tolist()]})

## Benchmark on the Prediction time for 100 images (images are from the COCO 2017 Validation Dataset)

In [None]:
%timeit predict_images(predictor, '../data', max_images=100, skip_images=bad_images)

## Deleting the Endpoint to save resources

In [199]:
predictor.delete_endpoint()