In [1]:
!pwd
!ls

# SageMaker Studio Code Editor working directory:
# /home/sagemaker-user/histology-image-analysis/sagemaker-inference

# SageMaker Notebook Instance working directory:
# /home/ec2-user/SageMaker/histology-image-analysis/sagemaker-inference

/home/sagemaker-user/histology-image-analysis/sagemaker-inference
MHIST_ViT_v13_dynamo_model.onnx  mhist-predict.ipynb  test_locally.py
__pycache__			 model.tar.gz
example_inference.py		 src


SageMaker recommends the structure:
```
model.tar.gz/
|- model.pth
|- src/
  |- inference.py
  |- requirements.txt  # only for versions 1.3.1 and higher
```

In [2]:
# Test inference locally
!pip install -U -q -r src/requirements.txt
%run test_locally.py

content_type application/json
Output: ('{"logit": 3.948834180831909, "predicted_class": "SSA", "probability": 0.9810874185378591}', 'application/json')


In [4]:
# Compress
# -c create archive
# -z gzip
# -v verbose
# -f to filename
!tar -czvf model.tar.gz MHIST_ViT_v13_dynamo_model.onnx src

MHIST_ViT_v13_dynamo_model.onnx
src/
src/inference.py
src/requirements.txt


In [9]:
# Uncompress
# -x extract
# -z gzip
# -v verbose
# -f from filename
!tar -xzvf model.tar.gz

MHIST_ViT_v13_dynamo_model.onnx
src/
src/inference.py
src/requirements.txt


In [5]:
# Upload model files to SageMaker default bucket
# !pip install -U sagemaker
import boto3
import sagemaker

sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
dest = sagemaker_session.upload_data(path='model.tar.gz', bucket=bucket, key_prefix='mhist-vit-model')

print(f"Model files uploaded to: {dest}")
# Model files uploaded to: s3://sagemaker-us-west-1-851725529671/mhist-vit-model/model.tar.gz

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Model files uploaded to: s3://sagemaker-us-west-1-851725529671/mhist-vit-model/model.tar.gz


Deploy a pretrained PyTorch model:
- create a PyTorchModel object and set an entry_point
- deploy a PyTorchPredictor

This creates a SageMaker Endpoint -- a hosted prediction service that we can use to perform inference.

In [7]:
# Create a PyTorch Endpoint from SageMaker Python SDK's PyTorch Model
# to deploy a PyTorch model trained outside of SageMaker.
# The AWS Model server is natively integrated with TorchServe,
# an open-source project developed by AWS and Facebook to serve PyTorch models.

import json
import sagemaker
from sagemaker.pytorch import PyTorchModel
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role() # arn:aws:iam::851725529671:role/SageMakerEx
model_data = 's3://sagemaker-us-west-1-851725529671/mhist-vit-model/model.tar.gz'

# Configure the SageMaker PyTorch model server
model = PyTorchModel(
    # Model params
    model_data=model_data,
    role=role,
    source_dir='src',
    entry_point='inference.py',

    # PyTorchModel params
    framework_version='2.3.0',
    py_version='py311',
    dependencies=['src/requirements.txt']
)
# deploy() creates a SageMaker Endpoint, a hosted prediction service
# returns a PyTorchPredictor, which runs inference on PyTorch Endpoints
# with the (above) PyTorch model server.
# Predictor will serialize Python lists, dictionaries, and numpy arrays
# to multidimensional tensors for PyTorch inference.
predictor = model.deploy(
    instance_type='ml.m5.xlarge',
    initial_instance_count=1,
    serializer=JSONSerializer(), # Default serializes input data to .npy format
    deserializer=JSONDeserializer() # Default parses the response from .npy format to numpy array.
)
# Test Endpoint 
response = predictor.predict({
    'bucket': 'mhist-streamlit-app',
    'key': 'images/original/MHIST_aah.png'
})

# Expected output:
# {"logit": 3.948834180831909,
# "predicted_class": "SSA",
# "probability": 0.9810874185378591}
response

-----!

{'logit': 3.948834180831909,
 'predicted_class': 'SSA',
 'probability': 0.9810874185378591}

In [8]:
predictor.delete_endpoint()