## Libraries to be imported

In [1]:
import os
import pathlib
import tensorflow as tf
import numpy as np
from PIL import Image
from shutil import copytree, rmtree
from glob import glob
import boto3
import tarfile




In [2]:
ZOO_BUCKET = 'sagemaker-us-east-1-113147044314'
ZOO_DIR = 'tf-model-zoo'

## Helper Function to copy [Tensorflow Model Zoo](https://github.com/tensorflow/models) object detection models to S3 and creating SageMaker Model from them.

SageMaker uses Tensorflow Serving, which requires the model to be in a folder structure like this:

```
model_dir/
  |
  +-version/
       |
       +-variables/
       |
       +-saved_model.pb
```

The models available in model zoo object detection are in a tar file, containing a folder structure in the following format:
```
model_dir/
  |
  +-<several files>
  |
  +-saved_model/
      |
      +-variables/
      |
      +-saved_model.pb
```

The `load_model_in_s3` function downloads the tar file from the zoo, unpacks it and takes the `saved_model` directory. It then restructures its content to the structure above, packs it into a `model.tar.gz` file which gets uploaded to the specified bucket and path, under a folder named after the model.

The `download_and_create_model` function continues the process, calling `load_model_in_s3` and then creating a [`sagemker.tensorflow.serving.Model`](https://sagemaker.readthedocs.io/en/stable/sagemaker.tensorflow.html#tensorflow-serving-model) object from the `model.tar.gz` file uploaded to s3. It can take additional parameters, which will be passed on to the `Model` initializer.

In [3]:
def load_model_in_s3(model_name, s3_bucket=ZOO_BUCKET, s3_path=ZOO_DIR):
    """
    Downloads an object detection model from Tensorflow Model Zoo, reorganizes the directory structure to be compatible with SageMaker and Tensorflow Serving
    and copies it to the specified S3 location.
    params:
        **model_name**: exact name of the model as specified at https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
        **s3_bucket**:  name of an S3 bucket where to store the repackaged model.tar.gz
        **s3_path**:    path where to store the model.tar.gz inside the bucket. The model name will be appended to it as a folder, and the model.tar.gz will be uploaded to that destination.
        
    returns: the full S3 object path of the model.tar.gz file, with the format 's3://<s3_bucket>/<s3_path>/<model_name>/model.tar.gz'
    """
    base_url = 'http://download.tensorflow.org/models/object_detection/'
    model_file = model_name + '.tar.gz'
    model_dir = tf.keras.utils.get_file(
    fname=model_name, 
    origin=base_url + model_file,
    cache_dir='/tmp',
    untar=True)

    model_dir = pathlib.Path(model_dir)/"saved_model"
    rmtree(f'/tmp/sm-models/{model_name}', ignore_errors=True)
    copytree(model_dir.absolute().as_posix(), f'/tmp/sm-models/{model_name}/1')
    cur_dir = os.getcwd()
    os.chdir(f'/tmp/sm-models/{model_name}')
    with tarfile.open(f"../{model_name}.tar.gz", "w:gz") as tar:
        for name in glob('*'):
            tar.add(name)
    os.chdir(cur_dir)
            
    s3 = boto3.client('s3')
    s3.upload_file(f"/tmp/sm-models/{model_name}.tar.gz", Bucket=s3_bucket, Key=f'{s3_path}/{model_name}/model.tar.gz')
    return(f's3://{s3_bucket}/{s3_path}/{model_name}/model.tar.gz')

In [4]:
from sagemaker.tensorflow.serving import Model
import sagemaker
import urllib

def download_and_create_model(model_name, bucket=None, bucket_path='tf-model-zoo', role=None, **kwargs):
    if bucket is None:
        bucket = sagemaker.session.Session().default_bucket()
    if role is None:
        role=sagemaker.get_execution_role()
    try:
        model_tar = load_model_in_s3(model_name, bucket, bucket_path)
    except urllib.error.HTTPError:
        raise ValueError(f'Model {model_name} not found on Tensorflow Model Zoo.')
    adj_model_name = model_name.replace("_", "-").replace(".", "-")
    model = Model(name=adj_model_name, model_data=model_tar, role=role, **kwargs)
    return(model)

## Helper Functions to Benchmark models on a directory of JPG images

These functions simply execute inference and return the prediction from one or several images. For a more elaborate version that loads the categories and displays the image with bounding boxes and probabilities, please refer to the [Object detection API demo notebook](https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb) in the Tensorflow Model Zoo repo.

In [5]:
# These images are known to consistently cause some models to fail.

bad_images = ['../data/000000001688.jpg',
              '../data/000000002240.jpg',
              '../data/000000000913.jpg',
              '../data/000000004208.jpg',
              '../data/000000000078.jpg',
              '../data/000000000073.jpg',
              '../data/000000002758.jpg',
              '../data/000000003947.jpg']

In [6]:
def load_images(image_path, max_images=10, skip_images=None):
    """
    Preloads an array of images from a path for faster inference
    """
    if skip_images is None:
        skip_images = []
    images = [(im_file, np.array(Image.open(im_file))) 
              for im_file in glob(f'{image_path}/*.jpg')[:max_images] 
                  if im_file not in skip_images]
    return(images)

def predict_images(model, image_path, max_images=10, skip_images=None):
    """
    does inference for a number of images from a path
    """
    images = load_images(image_path, max_images, skip_images)
    predictions = [(imfile, predict_image(model, image)) for imfile, image in images]
    print([f"{imfile} had no prediction" for imfile, output in predictions if len(output) == 0])
    return(predictions)

def predict_image(model, image):
    """
    Does inference for one preloaded image as a numpy array
    """
    input_dict = {'instances': [image.tolist()]}
    try:
        output_dict = model.predict(input_dict)    
    except Exception as e:
        output_dict = {}
        print(e)
    return output_dict

## Deploying a Model to SageMaker Tensorflow Serving

Once deployed, the model expects a request with the following `body` structure:
```
input = {
  'instances': [nested json list with all dimensions]
}
```

This nested structured can be obtained by converting an array to a list object, for instance using the `numpy.ndarray.tolist()` method. The response is a JSON structure like this:
```
{
  'predictions': [{'<prediction one>': [nested json array]}, {...},...]
}
```

## Benchmark helper functions

The functions below set up an inference endpoint, warm it up to reduce the impact of initial calls and profile the execution of a batch of images. They will be used for benchmarking models on several instance types.

In [24]:
import re
import pandas as pd
import cProfile
import io
import pstats
from collections import namedtuple
from time import sleep

In [60]:
def warmup_predictor(predictor, n_times=500):
    """Warms up the predictor with a fixed image, to reduce the risk of erroneous performance measurements due to initializations and lazy loading."""
    x = np.array(Image.open('../data/000000004042.jpg'))
    body={'instances':[x.tolist()]}
    [predictor.predict(body) for _ in range(n_times)]
    sleep(5000)
    print(f'Warmup finished for {predictor.endpoint}.')


def profile_predictor(predictor, images, executions=1):
    """Profiles the execution of a predictor with a batch of preloaded images."""
    pr = cProfile.Profile()
    bodies = [{'instances':[x.tolist()]} for _, x in images]
    pr.enable()
    [predictor.predict(body) for _ in range(executions) for body in bodies]
    pr.disable()
    print(f'Profiling finished for {predictor.endpoint}.')
    return(pr)


def get_stats(profile, sort=['cumtime'], pct=.1):
    """Retrieves the profile statistics for a previous run, and returns the information in an object
    returns:
        Profile: a namedtuple containing:
            - calls: integer total number of function calls made
            - total_seconds: float measurement of the total execution time in seconds
            - data: a Pandas DataFrame containing the details of individual calls, cumulative time, executions, etc.
    """
    Profile = namedtuple('Profile', ['calls', 'total_seconds', 'data'])
    s = io.StringIO()
    ps = pstats.Stats(profile, stream=s).sort_stats(*sort)
    ps.print_stats(pct)
    
    headers = []
    data = []
    calls = None
    total_seconds = None
    for i, line in enumerate(s.getvalue().split("\n")):
        if i == 0:
            calls = re.match(r'\s*(\d+) function calls', line).groups(1)[0]
            total_seconds = re.match(r"in (\d+\.\d+) seconds", line)
        if i < 5:
            continue
        reduced_line, _ = re.subn("\s+", " ", line)
        if len(reduced_line):
            if reduced_line[0] == ' ':
                reduced_line = reduced_line[1:]
            cols = reduced_line.split(' ')
            if headers and cols:
                data.append({h: v for h, v in zip(headers, cols)})
            if i == 5:
                headers = cols
    stats_data = pd.DataFrame(data)
    return(Profile(calls, total_seconds, stats_data))


def gen_model_instance_profile(model, instance, batch_size=100, executions=1):
    """Creates and profiles a predictor for the model and instance type requested.
    params:
        - model: name of the Tensorflow Model Zoo model to use
        - instance: a string defining an acceptable instance type for hosting a SageMaker endpoint ('ml.<family>.<size>')
        - images: a batch of images to run the benchmark on.
        - executions: number of times the whole batch should be processed for profiling
    returns ProfileResults: a namedtuple containing:
            - model: the parameter described above
            - instance_type: the `instance` parameter described above
            - predictor: the sagemaker.Predictor created from the model and instance type passed. Its name is a combination of both.
            - executions: the parameter described above
            - calls: integer total number of function calls made
            - total_seconds: float measurement of the total execution time in seconds
            - data: a Pandas DataFrame containing the details of individual calls, cumulative time, executions, etc.
    """
    print(f"Starting profile for model {model} on {instance} with {batch_size} images...")
    model_instance = download_and_create_model(model, framework_version='2.1.0')
    print(f'Created model {model_instance.name}')
    
    endpoint_name = f'{model_instance.name}-{instance.replace(".", "-")}'
    predictor = model_instance.deploy(
        initial_instance_count=1, instance_type=instance,
        endpoint_name=endpoint_name,
        update_endpoint=False)
    print(f"Endpoint {predictor.endpoint} created...")
    warmup_predictor(predictor)
    
    images = load_images(image_path='../data', max_images=batch_size, skip_images=bad_images)
    ProfileResults = namedtuple('Profile', ['model', 'instance_type', 'predictor', 'executions', 'calls', 'total_seconds', 'data'])
    profile = profile_predictor(predictor, images, executions=executions)
    
    stats = get_stats(profile)
    return(ProfileResults(
        model=model,
        instance_type=instance,
        predictor=predictor,
        executions=executions,
        calls=stats.calls,
        total_seconds=stats.total_seconds,
        data=stats.data
    ))

## Benchmark on the Prediction time for 100 images (images are from the COCO 2017 Validation Dataset)

The benchmark will be run for several models and several instance types, as listed below.

In [47]:
models = ['faster_rcnn_resnet50_coco_2018_01_28', 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03', 'faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28']
instance_types = ['ml.p3.2xlarge', 'ml.p2.xlarge', 'ml.c5.2xlarge', 'ml.c5.4xlarge', 'ml.m5.4xlarge']

The cell below runs a parallel test of each model on all the instance types defined for benchmarking.

**Note**: running all these instances in parallel incurs in costs and can reach service quota limits. Plan your own tests carefully.

In [None]:
from itertools import product
from multiprocessing import Pool

def parallel_gen(*params):
    return(gen_model_instance_profile(params[0][0], params[0][1]))

with Pool(len(instance_types)) as executor:
    results = executor.map(parallel_gen, [[m, i] for (m, i) in product(models, instance_types)])

Starting profile for model faster_rcnn_resnet50_coco_2018_01_28 on ml.p2.xlarge with 100 images...
Starting profile for model faster_rcnn_resnet50_coco_2018_01_28 on ml.p3.2xlarge with 100 images...
Starting profile for model faster_rcnn_resnet50_coco_2018_01_28 on ml.c5.2xlarge with 100 images...
Starting profile for model faster_rcnn_resnet50_coco_2018_01_28 on ml.c5.4xlarge with 100 images...
Starting profile for model faster_rcnn_resnet50_coco_2018_01_28 on ml.m5.4xlarge with 100 images...
Starting profile for model ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03 on ml.p3.2xlarge with 100 images...
Created model faster-rcnn-resnet50-coco-2018-01-28


Using already existing model: faster-rcnn-resnet50-coco-2018-01-28


-Created model faster-rcnn-resnet50-coco-2018-01-28


Using already existing model: faster-rcnn-resnet50-coco-2018-01-28


-Created model ssd-resnet50-v1-fpn-shared-box-predictor-640x640-coco14-sync-2018-07-03
Starting profile for model ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03 on ml.p2.xlarge with 100 images...
Starting profile for model ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03 on ml.c5.2xlarge with 100 images...
Starting profile for model ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03 on ml.c5.4xlarge with 100 images...
Created model ssd-resnet50-v1-fpn-shared-box-predictor-640x640-coco14-sync-2018-07-03
Starting profile for model ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03 on ml.m5.4xlarge with 100 images...
Starting profile for model faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28 on ml.p3.2xlarge with 100 images...
Starting profile for model faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28 on ml.p2.xlarge with 100 images...
Created model ssd-resnet50-v1-fpn-shared-box-predicto

Using already existing model: faster-rcnn-inception-resnet-v2-atrous-coco-2018-01-28


Starting profile for model faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28 on ml.c5.4xlarge with 100 images...


In [None]:
results

## Deleting the Endpoints to save resources

In [None]:
predictor.delete_endpoint()