## Summary

Deploys a fastai model to a sagemaker endpoint using torchserve.  This notebook can be run on a CPU based Sagemaker notebook instance.

This is based on other guides on the internet that use [pytorch 1.0](https://course19.fast.ai/deployment_amzn_sagemaker.html) and [pytorch 1.4](https://github.com/mattmcclean/fastai2-sagemaker-deployment-demo/blob/master/fastai2_deploy_sagemaker_demo.ipynb).  This guide uses the newer deployment mechanism of torchserve which is only available in pytorch >= 1.6.


In [None]:
# install fastai deps
! pip install -Uqq fastbook

In [None]:
# check versions
! pip list | egrep 'fast|torch|sagemaker'

# fastai >= 2.1.9      (we want a modern fastai v2)
# sagemaker >= 2.19.0  (should be sagemaker v2, might need to upgrade via pip)
# torch >= 1.7.1       (make sure torch meets minimum requirements of fastai)
# torchvision >= 0.8.2 (make sure torchvision meets minimum requirements of fastai)

# see environment.yml of the fastai version you are using for required dependencies: https://github.com/fastai/fastai/blob/master/environment.yml

In [None]:
import sagemaker
import tarfile
from fastai.vision.all import *
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
from sagemaker.pytorch import PyTorchModel

## Train

In [None]:
path = untar_data(URLs.PETS)/'images'

# create the image data loader
dls = ImageDataLoaders.from_path_re(path, get_image_files(path), pat=r'(.+)_\d+.jpg$', 
                                    item_tfms=RandomResizedCrop(460, min_scale=0.75), bs=64,
                                    batch_tfms=[*aug_transforms(size=299, max_warp=0),
                                    Normalize.from_stats(*imagenet_stats)])

In [None]:
# train, keeping it simple in case you're on a CPU instance
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(0, freeze_epochs=1)

## Upload saved model

In [None]:
# file must be named model.pth to work with torchserve running on framework 1.6.0
model_export = 'model.pth'

# export learner to path
# must do it this way so it includes the model weights and architecture
# save_model and torch.save will not work
learn.export('model.pth')

In [None]:
# this is where the model.pth file gets saved
learn.path

In [None]:
# generate model archive (.tar.gz)
# model.pth should be the only file stuffed into model archive
path = Path('.')
model_archive = 'pets_model.tar.gz'
with tarfile.open(path/model_archive, 'w:gz') as f:
    f.add(learn.path/model_export, arcname=model_export)

In [None]:
sess = sagemaker.Session()
prefix = 'DEMO-fastai2-sagemaker-oxford-pets'
model_location = sess.upload_data(str(path/model_archive), key_prefix=prefix)
model_location

In [None]:
# clean up local files
Path(path/model_archive).unlink()
Path(learn.path/model_export).unlink()

## Deploy local endpoint

In [None]:
# instantiate serializer objects the new sagemaker v2 way
json_serializer = JSONSerializer()
json_deserializer = JSONDeserializer()

# get role
role = sagemaker.get_execution_role()

In [None]:
# framework_version=1.6.0 is the latest supported pytorch and the only one compatible with new torchserve deployment model
# pytorch 1.7.1 will be installed via requirements.txt
# serve.py is the script that will load the model and process predictions

model = PyTorchModel(model_data=model_location,
                     role=role,
                     framework_version='1.6.0',
                     py_version='py36',
                     entry_point='serve.py', 
                     source_dir='scripts')

In [None]:
# test docker container locally
# may fail due to lack of space on device
# if so, then:
#  - clean out all /tmp/tmp* files
#  - docker rm all containers (use docker ps -a to list)

predictor = model.deploy(initial_instance_count=1, instance_type='local',
                         serializer=json_serializer, deserializer=json_deserializer, content_type='application/json')

In [None]:
# test inference
response = predictor.predict( { "url": "https://cdn1-www.cattime.com/assets/uploads/2011/12/file_2744_british-shorthair-460x290-460x290.jpg" } )
response

In [None]:
# use this to stop local docker container
predictor.delete_endpoint()

## Deploy remote endpoint

In [None]:
# clear out old objects or else model.deploy() will still happen locally
del predictor
del model

In [None]:
model = PyTorchModel(model_data=model_location,
                     role=role,
                     framework_version='1.6.0',
                     py_version='py36',
                     entry_point='serve.py', 
                     source_dir='scripts')

In [None]:
# specify a real instance_type to create a remote endpoint
predictor = model.deploy(initial_instance_count=1, instance_type='ml.t2.large',
                         serializer=json_serializer, deserializer=json_deserializer, content_type='application/json')

In [None]:
response = predictor.predict( { "url": "https://cdn1-www.cattime.com/assets/uploads/2011/12/file_2744_british-shorthair-460x290-460x290.jpg" } )
response

In [None]:
# delete the remote endpoint or it will cost you money
predictor.delete_endpoint()