# **Deploy a pretrained, optimized ONNX model to SageMaker Endpoint**

In [1]:
# Git doesn't work well within the AWS Studio Code Editor space
# Make sure the code is up-to-date:
!git pull origin main

From https://github.com/aquitzia/histology-image-analysis
 * branch            main       -> FETCH_HEAD
Already up to date.


In [2]:
!pwd
!ls

# SageMaker Studio Code Editor working directory:
# /home/sagemaker-user/histology-image-analysis/sagemaker-inference

# SageMaker Notebook Instance working directory:
# /home/ec2-user/SageMaker/histology-image-analysis/sagemaker-inference

/home/sagemaker-user/histology-image-analysis/sagemaker-inference
mhist-predict.ipynb  model.tar.gz  src	test_locally.py


In [14]:
# # Download from s3 and uncompress
# import os
# import boto3

# s3 = boto3.client('s3')
# s3.download_file(Bucket='sagemaker-us-west-1-851725529671', Key='mhist-vit-model/model.tar.gz', Filename='model.tar.gz')

# # tar:
# # -x extract
# # -z gzip
# # -v verbose
# # -f from filename
# !tar -xzvf model.tar.gz

MHIST_ViT_v13_dynamo_model.onnx
src/
src/inference.py
src/requirements.txt


### Test and Upload model artifacts

SageMaker recommends the structure:
```
model.tar.gz/
|- model.pth
|- src/
  |- inference.py
  |- requirements.txt  # only for versions 1.3.1 and higher
```

In [16]:
# Test inference locally
!pip install -U -q -r src/requirements.txt
%run test_locally.py

content_type application/json
Output: ('{"logit": 3.948834180831909, "predicted_class": "SSA", "probability": 0.9810874185378591}', 'application/json')


In [4]:
# Compress
# -c create archive
# -z gzip
# -v verbose
# -f to filename
print('Archive contents:')
!tar -czvf model.tar.gz MHIST_ViT_v13_dynamo_model.onnx src
print('\nArchive info:')
!ls -lha model.tar.gz

MHIST_ViT_v13_dynamo_model.onnx
src/
src/inference.py
src/requirements.txt


In [43]:
# Upload model artifacts to S3
import boto3
import sagemaker

MODEL_BUCKET = 'mhist-artifacts-2024'
S3_PREFIX = 'sagemaker-model-artifacts'
S3_FILENAME = 'model.tar.gz'

sagemaker_session = sagemaker.Session()
model_artifacts_archive = sagemaker_session.upload_data(
    path=S3_FILENAME,
    bucket=MODEL_BUCKET,
    key_prefix=S3_PREFIX)

print(f"Model files uploaded to: {model_artifacts_archive}")
# Model files uploaded to: s3://mhist-artifacts-2024/sagemaker-model-artifacts/model.tar.gz

Model files uploaded to: s3://mhist-artifacts-2024/sagemaker-model-artifacts/model.tar.gz


The output message states that SageMaker SDK is using its built-in default settings rather than any custom configurations, located at:
- `/etc/xdg/sagemaker/config.yaml`: system-wide config
- `/home/sagemaker-user/.config/sagemaker/config.yaml`: user-specific config

### Deploy PyTorchModel server and PyTorchPredictor Endpoint:
We will deploy a PyTorch model trained outside of SageMaker. The AWS PyTorchModel server is natively integrated with TorchServe, an open-source project developed by AWS and Facebook to serve PyTorch models.
1. Set up a SageMaker Python SDK PyTorchModel object, set an entry_point
2. Deploy the model to create a PyTorchPredictor, which manages a SageMaker Endpoint.
A SageMaker Endpoint is a hosted prediction service for performing inference.

In [49]:
# Define the SageMaker PyTorchModel
from sagemaker.pytorch import PyTorchModel
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
# model_artifacts_archive = 's3://sagemaker-us-west-1-851725529671/mhist-vit/model.tar.gz'

model = PyTorchModel(
    # Model params
    model_data=model_artifacts_archive,
    role=role,
    source_dir='src',
    entry_point='inference.py',

    # PyTorchModel params
    framework_version='2.3.0',
    py_version='py311',
    dependencies=['src/requirements.txt']
)

In [50]:
import json
import sagemaker

sagemaker_session = sagemaker.Session()
sagemaker_client = sagemaker_session.sagemaker_client
role = sagemaker.get_execution_role() # arn:aws:iam::851725529671:role/SageMakerEx

In [51]:
# Create a SageMaker Model, Endpoint, and Endpoint Configuration

predictor = model.deploy( # returns a PyTorchPredictor
    instance_type='ml.m5.xlarge',
    initial_instance_count=1,
    serializer=JSONSerializer(), # Default serializes input data to .npy format
    deserializer=JSONDeserializer() # Default parses the response from .npy format to numpy array.
)
# print(f"\nDeployed PyTorchModel: {model.name}")
model_info = sagemaker_client.describe_model(ModelName=model.name)
print('\nDeployed PyTorchModel:', model_info['ModelName'])
print('Instance Recommendations:')
recs = model_info['DeploymentRecommendation']['RealTimeInferenceRecommendations']
for rec in recs:
    print('    -', rec['InstanceType'])

print(f"\nPyTorchPredictor Endpoint: {predictor.endpoint_name}")
endpoint_info = sagemaker_client.describe_endpoint(EndpointName=predictor.endpoint_name)
for prod in endpoint_info['ProductionVariants']:
    for deployed in prod['DeployedImages']:
        print('Image pushed to ECR repo:', deployed['SpecifiedImage'])

# Endpoint Cofiguration name matches Endpoint name
# print(f"PyTorchPredictor Endpoint Configuration: {endpoint_info['EndpointConfigName']}")

# Use PyTorchPredictor to run inference on an Endpoint (instance)
# Predictor's default is to serialize Python lists, dictionaries, and numpy arrays
# to multidimensional tensors for PyTorch inference.
# Here, we pass the image path(s) for the computer vision model
response = predictor.predict({
    'bucket': 'mhist-streamlit-app',
    'key': 'images/original/MHIST_aah.png'
})

# Delete Endpoint, which incurs significant fees to run
predictor.delete_endpoint()
print(f"\nDeleted Endpoint, Configuration, and Artifacts:")

# Delete all artifacts from sagemaker.Session().default_bucket():
objects = s3.list_objects_v2(Bucket=sagemaker_session.default_bucket()) # 'sagemaker-us-west-1-851725529671'
for obj in objects.get('Contents', []):
    print(f"{obj['Key']} | LastModified: {obj['LastModified']} | Size: {obj['Size']}")
    s3.delete_object(Bucket=bucket, Key=obj['Key'])

# Expected output:
# {"logit": 3.948834180831909,
# "predicted_class": "SSA",
# "probability": 0.9810874185378591}
print('\n Model output:', response)

-----!
Deployed PyTorchModel: pytorch-inference-2024-07-22-10-29-54-289
Instance Recommendations:
    - ml.g4dn.xlarge
    - ml.g4dn.2xlarge
    - ml.c6i.xlarge

PyTorchPredictor Endpoint: pytorch-inference-2024-07-22-10-29-55-303
Image pushed to ECR repo: 763104351884.dkr.ecr.us-west-1.amazonaws.com/pytorch-inference:2.3.0-cpu-py311

Deleted Endpoint, Configuration, and Artifacts:
pytorch-inference-2024-07-22-10-29-21-491/model.tar.gz | LastModified: 2024-07-22 10:29:51+00:00 | Size: 318973566

 Model output: {'logit': 3.948834180831909, 'predicted_class': 'SSA', 'probability': 0.9810874185378591}


In [2]:
# Output:

# model artifact for PyTorchModel:
# mhist-vit/model.tar.gz LastModified: 2024-07-22 03:12:54+00:00 Size: 318708924

# model artifact for PyTorchModel Endpoint:
# pytorch-inference-2024-07-22-03-13-07-428/model.tar.gz LastModified: 2024-07-22 03:13:34+00:00 Size: 318973586

# s3.list_objects_v2 returns ResponseMetadata:
# RequestId- same as x-amz-request-id (below)
# HostId- host that responded (s3 id)
# HTTPStatusCode- 200 for success
# HTTPHeaders:
#       x-amz-id-2: s3 id
#       x-amz-request-id: AWS id for the request
#       date
#       x-amz-bucket-region
#       content-type
#       transfer-encoding: 'chunked' response
#       server: 'AmazonS3'
# RetryAttempts = 0
# IsTruncated
# Contents: (list of dicts)
#       Key
#       LastModified
#       ETag
#       Size
# StorageClass = 'STANDARD'
# Name = 'sagemaker-us-west-1-851725529671'
# Prefix = ''
# MaxKeys = 1000
# EncodingType = url
# KeyCount = 4

In [8]:
# # Optionally delete the SageMaker model, which doesn't incur charges
# # (or go to SageMaker Studio --> Models --> Deployable Models)
# model.delete_model()

# # If we delete the SageMaker model, delete the associated artifact in S3
# import boto3
# BUCKET = 'mhist-artifacts-2024'
# S3_PREFIX = 'sagemaker-model-artifacts'
# S3_FILENAME = 'model.tar.gz'
# s3 = boto3.client('s3')
# s3.delete_object(Bucket=BUCKET, Key=f"{S3_PREFIX}/{S3_FILENAME}")

# # sagemaker.Session() object doesn't use any other resources (besides notebook memory)

Also, remember to stop the Studio Instance:
- Close this notebook, then click **SageMaker Studio --> Running Instances --> Stop**
- When you stop the Studio instance, SageMaker with delete the associated EBS volume

To double-check, go to the EC2 console
- In the left sidebar, click Elastic Block Store --> Volumes
- Look for any volumes with a name starting with "sagemaker-"

Check **AWS Billing** dashboard to check for any resources that might be used accidentally.