# **Deploy a pretrained, optimized ONNX model to SageMaker Endpoint**

In [2]:
# Git doesn't work well within the AWS Studio Code Editor space
# Make sure the code is up-to-date:
!git pull origin main

From https://github.com/aquitzia/histology-image-analysis
 * branch            main       -> FETCH_HEAD
Already up to date.


In [4]:
S3_PREFIX = 'mhist-vit'
S3_FILENAME = 'model.tar.gz'

!pwd
!ls

# SageMaker Studio Code Editor working directory:
# /home/sagemaker-user/histology-image-analysis/sagemaker-inference

# SageMaker Notebook Instance working directory:
# /home/ec2-user/SageMaker/histology-image-analysis/sagemaker-inference

/home/sagemaker-user/histology-image-analysis/sagemaker-inference
MHIST_ViT_v13_dynamo_model.onnx  model.tar.gz  test_locally.py
mhist-predict.ipynb		 src


In [14]:
# # Download from s3 and uncompress
# import os
# import boto3

# s3 = boto3.client('s3')
# s3.download_file(Bucket='sagemaker-us-west-1-851725529671', Key='mhist-vit-model/model.tar.gz', Filename='model.tar.gz')

# # tar:
# # -x extract
# # -z gzip
# # -v verbose
# # -f from filename
# !tar -xzvf model.tar.gz

MHIST_ViT_v13_dynamo_model.onnx
src/
src/inference.py
src/requirements.txt


### Test and Upload model artifacts

SageMaker recommends the structure:
```
model.tar.gz/
|- model.pth
|- src/
  |- inference.py
  |- requirements.txt  # only for versions 1.3.1 and higher
```

In [16]:
# Test inference locally
!pip install -U -q -r src/requirements.txt
%run test_locally.py

content_type application/json
Output: ('{"logit": 3.948834180831909, "predicted_class": "SSA", "probability": 0.9810874185378591}', 'application/json')


In [4]:
# Compress
# -c create archive
# -z gzip
# -v verbose
# -f to filename
print('Archive contents:')
!tar -czvf model.tar.gz MHIST_ViT_v13_dynamo_model.onnx src
print('\nArchive info:')
!ls -lha model.tar.gz

MHIST_ViT_v13_dynamo_model.onnx
src/
src/inference.py
src/requirements.txt


In [20]:
# Upload model artifacts to SageMaker default bucket
# !pip install -U sagemaker
import boto3
import sagemaker

sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket() # 'sagemaker-us-west-1-851725529671'
S3_FILENAME = 'model.tar.gz'
S3_PREFIX = 'mhist-vit'

model_path = sagemaker_session.upload_data(
    path=S3_FILENAME,
    bucket=bucket,
    key_prefix=S3_PREFIX)

print(f"Model files uploaded to: {model_path}")
# Model files uploaded to: s3://sagemaker-us-west-1-851725529671/mhist-vit-model/model.tar.gz

Model files uploaded to: s3://sagemaker-us-west-1-851725529671/mhist-vit/model.tar.gz


The output message states that SageMaker SDK is using its built-in default settings rather than any custom configurations, located at:
- `/etc/xdg/sagemaker/config.yaml`: system-wide config
- `/home/sagemaker-user/.config/sagemaker/config.yaml`: user-specific config

In [None]:
# List all model artifacts in S3
s3 = boto3.client('s3')
objects = s3.list_objects_v2(Bucket=bucket)
print(f"{objects['KeyCount']} model artifacts remaining in S3 bucket: {objects['Name']}")
for obj in objects.get('Contents', []): # returns [] by default
    print(f"{obj['Key']} LastModified: {obj['LastModified']} Size: {obj['Size']}")

### Deploy PyTorchModel server and PyTorchPredictor Endpoint:
- create a PyTorchModel object and set an entry_point
- deploy a PyTorchPredictor

This creates a SageMaker Endpoint -- a hosted prediction service that we can use to perform inference.

In [25]:
# Create a PyTorch Endpoint from SageMaker Python SDK's PyTorch Model
# to deploy a PyTorch model trained outside of SageMaker.
# The AWS Model server is natively integrated with TorchServe,
# an open-source project developed by AWS and Facebook to serve PyTorch models.

import json
import sagemaker
from sagemaker.pytorch import PyTorchModel
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role() # arn:aws:iam::851725529671:role/SageMakerEx
# model_path = 's3://sagemaker-us-west-1-851725529671/mhist-vit/model.tar.gz'

# Configure the SageMaker PyTorch model server
model = PyTorchModel(
    # Model params
    model_data=model_path,
    role=role,
    source_dir='src',
    entry_point='inference.py',

    # PyTorchModel params
    framework_version='2.3.0',
    py_version='py311',
    dependencies=['src/requirements.txt']
)
print(f"\nPyTorchModel: {model.name}")

-----!
PyTorchModel: pytorch-inference-2024-07-22-03-19-10-318
PyTorchPredictor Endpoint: pytorch-inference-2024-07-22-03-19-11-113


{'logit': 3.948834180831909,
 'predicted_class': 'SSA',
 'probability': 0.9810874185378591}

In [None]:
# List all model artifacts in S3
s3 = boto3.client('s3')
objects = s3.list_objects_v2(Bucket=bucket)
print(f"{objects['KeyCount']} model artifacts remaining in S3 bucket: {objects['Name']}")
for obj in objects.get('Contents', []): # returns [] by default
    print(f"{obj['Key']} LastModified: {obj['LastModified']} Size: {obj['Size']}")

In [None]:
# deploy() creates a SageMaker Endpoint, a hosted prediction service
# returns a PyTorchPredictor, which runs inference on PyTorch Endpoints
# with the (above) PyTorch model server.
# Predictor will serialize Python lists, dictionaries, and numpy arrays
# to multidimensional tensors for PyTorch inference.
predictor = model.deploy(
    instance_type='ml.m5.xlarge',
    initial_instance_count=1,
    serializer=JSONSerializer(), # Default serializes input data to .npy format
    deserializer=JSONDeserializer() # Default parses the response from .npy format to numpy array.
)
print(f"\nPyTorchModel: {model.name}")
print(f"PyTorchPredictor Endpoint: {predictor.endpoint_name}")

# Test Endpoint 
response = predictor.predict({
    'bucket': 'mhist-streamlit-app',
    'key': 'images/original/MHIST_aah.png'
})

# Expected output:
# {"logit": 3.948834180831909,
# "predicted_class": "SSA",
# "probability": 0.9810874185378591}
response

### Cleanup

In [None]:
# Delete SageMaker Endpoint, which incurs significant fees to run
predictor.delete_endpoint()

In [3]:
# List model artifacts in S3
objects = s3.list_objects_v2(Bucket=bucket)
print(f"{objects['KeyCount']} model artifacts remaining in S3 bucket: {objects['Name']}")
for obj in objects.get('Contents', []): # returns [] by default
    print(f"{obj['Key']} LastModified: {obj['LastModified']} Size: {obj['Size']}")

# Output:

# model artifact for PyTorchModel:
# mhist-vit/model.tar.gz LastModified: 2024-07-22 03:12:54+00:00 Size: 318708924

# model artifact for PyTorchModel Endpoint:
# pytorch-inference-2024-07-22-03-13-07-428/model.tar.gz LastModified: 2024-07-22 03:13:34+00:00 Size: 318973586

# s3.list_objects_v2 returns ResponseMetadata:
# RequestId- same as x-amz-request-id (below)
# HostId- host that responded (s3 id)
# HTTPStatusCode- 200 for success
# HTTPHeaders:
#       x-amz-id-2: s3 id
#       x-amz-request-id: AWS id for the request
#       date
#       x-amz-bucket-region
#       content-type
#       transfer-encoding: 'chunked' response
#       server: 'AmazonS3'
# RetryAttempts = 0
# IsTruncated
# Contents: (list of dicts)
#       Key
#       LastModified
#       ETag
#       Size
# StorageClass = 'STANDARD'
# Name = 'sagemaker-us-west-1-851725529671'
# Prefix = ''
# MaxKeys = 1000
# EncodingType = url
# KeyCount = 4

3 model artifacts remaining in S3 bucket: sagemaker-us-west-1-851725529671
mhist-vit/model.tar.gz LastModified: 2024-07-22 03:12:54+00:00 Size: 318708924
pytorch-inference-2024-07-22-03-13-07-428/model.tar.gz LastModified: 2024-07-22 03:13:34+00:00 Size: 318973586
pytorch-inference-2024-07-22-03-18-34-828/model.tar.gz LastModified: 2024-07-22 03:19:04+00:00 Size: 318973480


In [None]:
# List all Endpoint Configurations
sagemaker_client = sagemaker_session.sagemaker_client
response = sagemaker_client.list_endpoint_configs()

for endpoint_config in response['EndpointConfigs']:
    print(f"EndpointConfigName: {endpoint_config['EndpointConfigName']}, CreationTime: {endpoint_config['CreationTime']}")

In [8]:
# # Optionally delete the SageMaker model, which doesn't incur charges
# # (or go to SageMaker Studio --> Models --> Deployable Models)
# model.delete_model()

# # Delete model artifact from sagemaker.Session().default_bucket()
# S3_PREFIX = 'mhist-vit'
# S3_FILENAME = 'model.tar.gz'
# s3.delete_object(Bucket=bucket, Key=f"{S3_PREFIX}/{S3_FILENAME}")

# # Delete all artifacts from sagemaker.Session().default_bucket():
# objects = s3.list_objects_v2(Bucket=bucket)
# for obj in objects.get('Contents', []):
#     s3.delete_object(Bucket=bucket, Key=obj['Key'])

# # sagemaker.Session() object doesn't use any other resources (besides notebook memory)

Also, remember to stop the Studio Instance:
- Close this notebook, then click **SageMaker Studio --> Running Instances --> Stop**
- When you stop the Studio instance, SageMaker with delete the associated EBS volume

To double-check, go to the EC2 console
- In the left sidebar, click Elastic Block Store --> Volumes
- Look for any volumes with a name starting with "sagemaker-"

Check **AWS Billing** dashboard to check for any resources that might be used accidentally.