# Deploying to SageMaker Native Containers

Amazon SageMaker AI provides prebuilt Docker images that include deep learning frameworks and other dependencies needed for training and inference. 

With the SageMaker Python SDK, you can train and deploy models using these popular deep learning frameworks. For instructions on installing and using the SDK, see Amazon SageMaker Python SDK. The following is a list of the available frameworks:
* TensorFlow
* MXNet
* PyTorch
* Chainer
* Hugging Face

In this notebook, we will deploy Computer Vision models to a SageMaker prebuilt containers.


## Prerequisites

In [4]:
import boto3
from IPython.core.display import HTML
import base64
import json

region = boto3.Session().region_name

s3 = boto3.client("s3")

endpoint_cleanup_list = []

### Create helper functions

In [5]:
def download_from_s3(key_filenames):
    s3_bucket = f"jumpstart-cache-prod-{region}"
    key_prefix = "inference-notebook-assets"
    for key_filename in key_filenames:
        s3.download_file(s3_bucket, f"{key_prefix}/{key_filename}", key_filename)


def query_endpoint(endpoint_name, img, jumpstart_flag=True):
    if jumpstart_flag:
        content_type = "application/x-image"
        accept = 'application/json;verbose'
    else:
        content_type = "image/x-image"
        accept = 'application/json;'
    client = boto3.client('runtime.sagemaker')
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType=content_type, Body=img, Accept=accept)
    return response


def parse_prediction(query_response, jumpstart_flag=True):
    model_predictions = json.loads(query_response['Body'].read())
    if jumpstart_flag:
        predicted_label = model_predictions['predicted_label']
        labels = model_predictions['labels']
        probabilities = model_predictions['probabilities']
        return predicted_label, probabilities, labels
    else:
        predicted_label = model_predictions[0]["label"]
        labels = [i["label"] for i in model_predictions]
        probabilities = [i["score"] for i in model_predictions]
        return predicted_label, probabilities, labels


def predict_top_k_labels(probabilities, labels,k, jumpstart_flag=True):
    topk_prediction_ids = sorted(range(len(probabilities)), key=lambda index: probabilities[index], reverse=True)[:k]
    topk_class_labels = ", ".join([labels[id] for id in topk_prediction_ids])
    return topk_class_labels


def display_images_with_prediction (endpoint_name, images: dict, jumpstart_flag = True):
    for filename, img in images.items():
        query_response = query_endpoint(endpoint_name, img, jumpstart_flag)
        predicted_label, probabilities, labels = parse_prediction(query_response, jumpstart_flag)
        top5_class_labels = predict_top_k_labels(probabilities, labels, 5, jumpstart_flag)
        display(HTML(f'<img src={filename} alt={filename} align="left" style="width: 250px;"/>' 
                     f'<figcaption>Predicted Label is : {predicted_label}</figcaption>'
                    f'<figcaption>Top-5 model predictions are: {top5_class_labels}</figcaption>'))


def cleanup_endpoint(endpoint_name):
    try:
        sagemaker_client = boto3.client('sagemaker')
    
        endpoint_response = sagemaker_client.describe_endpoint(
            EndpointName=endpoint_name
        )
        endpoint_config_name = endpoint_response['EndpointConfigName']
    
        sagemaker_client.delete_endpoint(
                        EndpointName=endpoint_name
                    )
        sagemaker_client.delete_endpoint_config(
                            EndpointConfigName=endpoint_config_name
                        )
    except:
        print("Skipping deletion of ", endpoint_name)

### Prepare sample images for testing

In [6]:
images_list = ["cat.jpg", "dog.jpg", "boxer_dog.jpg"]
download_from_s3(key_filenames=images_list)
images = {}
for f in images_list:
    with open(f, 'rb') as file: images[f] = file.read()

## 1. Deploy from SageMaker Jump Start
Amazon SageMaker JumpStart provides a streamlined way to deploy and fine-tune machine learning models, including pre-trained foundation models and built-in algorithms. When you deploy a model through JumpStart, SageMaker handles the underlying infrastructure, including the use of Docker containers to package and run the model and its dependencies.\n
In this notebook, we will deploy EfficientNetV2-ImageNet21 model, which is an image classifier, to a CPU instance.

In [7]:
import sagemaker
from sagemaker.jumpstart.model import JumpStartModel
from sagemaker import image_uris, model_uris, script_uris

session = sagemaker.Session()

model_id = "tensorflow-ic-efficientnet-v2-imagenet21k-ft1k-m"
model_version = "*"
instance_type = "ml.c5.4xlarge"

# JumpStart
model = JumpStartModel(
    model_id=model_id,
    model_version=model_version,
    instance_type=instance_type,
    sagemaker_session=session
)

# Deploy the model
# The endpoint name will be automatically generated if not specified
predictor = model.deploy(
    initial_instance_count=1,
    wait=True
)

jumpstart_endpoint_name = predictor.endpoint_name
endpoint_cleanup_list.append(jumpstart_endpoint_name)

print(f"Model deployed successfully!")
print(f"Endpoint name: {jumpstart_endpoint_name}")

Using model 'tensorflow-ic-efficientnet-v2-imagenet21k-ft1k-m' with wildcard version identifier '*'. You can pin to version '2.0.18' for more stable results. Note that models may have different input/output signatures after a major version upgrade.


-----!Model deployed successfully!
Endpoint name: tf-ic-efficientnet-v2-imagenet21k-ft1k--2025-08-27-15-58-38-802


In [8]:
# Test
display_images_with_prediction(jumpstart_endpoint_name, images)

## 2. Deploy Hugging Face models
Amazon SageMaker AI lets customers train, fine-tune, and run inference using Hugging Face models for Natural Language Processing (NLP) on SageMaker AI. You can use Hugging Face for both training and inference. The following section provides information on Hugging Face models and includes reference material you can use to learn how to use Hugging Face with SageMaker AI.
<br/><br/>
This functionality is available through the development of Hugging Face [AWS Deep Learning Containers](https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/what-is-dlc.html). These containers include Hugging Face Transformers, Tokenizers and the Datasets library, which allows you to use these resources for your training and inference jobs. For a list of the available Deep Learning Containers images, see [Available Deep Learning Containers Images](https://github.com/aws/deep-learning-containers/blob/master/available_images.md). These Deep Learning Containers images are maintained and regularly updated with security patches.
<br/><br/>
In this example, we will deploy ResNet-50 which is another image classification model, to Hugging Face container.

In [9]:
import sagemaker
from sagemaker.huggingface.model import HuggingFaceModel
from  sagemaker.base_serializers import DataSerializer

session = sagemaker.Session()
role = sagemaker.get_execution_role()

# Model parameters
model_id = "microsoft/resnet-50"
task = "image-classification"

# Specify the container with HuggingFace transformers
hub = {
    'HF_MODEL_ID': model_id,
    'HF_TASK': task,
    'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
    'SAGEMAKER_REGION': session.boto_region_name
}

# Create HuggingFace Model
huggingface_model = HuggingFaceModel(
    env=hub,                      # Configuration for loading model from Hub
    role=role,                    # IAM role with required permissions
    transformers_version="4.26.0", # Transformers version
    pytorch_version="1.13.1",     # PyTorch version
    py_version="py39",            # Python version
)


# Deploy the model
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.c5.4xlarge",  # You can change the instance type based on your needs
    endpoint_name=f"{model_id.replace('/','-')}-{session.boto_region_name}",  # Optional: specify endpoint name
    image_serializer=DataSerializer(content_type='image/x-image')
)


hf_endpoint_name = predictor.endpoint_name
endpoint_cleanup_list.append(hf_endpoint_name)

print(f"Model deployed successfully!")
print(f"Endpoint name: {hf_endpoint_name}")


-----!Model deployed successfully!
Endpoint name: microsoft-resnet-50-us-west-2


In [10]:
display_images_with_prediction(hf_endpoint_name, images, jumpstart_flag=False)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (400) from primary with message "{
  "code": 400,
  "type": "InternalServerException",
  "message": "Content type application/json; is not supported by this framework.\n\n            Please implement input_fn to to deserialize the request data or an output_fn to\n            serialize the response. For more information, see the SageMaker Python SDK README."
}
". See https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logEventViewer:group=/aws/sagemaker/Endpoints/microsoft-resnet-50-us-west-2 in account 440577664410 for more information.

## Deploy to PyTorch Container
In this example, we will deploy a model artifact to PyTorch container. 

In [None]:
# Download pretained weights

import torchvision.models as models
import torch
import os

# Download ResNet-50 with ImageNet pre-trained weights
model = models.resnet50(pretrained=True)

os.makedirs("resnet50", exist_ok=True)
os.makedirs("resnet50/code", exist_ok=True)
# Save the model's state dictionary
torch.save(model.state_dict(), 'resnet50/model.pth')


The inference.py file is essential for a SageMaker PyTorch container because it defines how your trained PyTorch model should be loaded and how it should perform inference when deployed as a SageMaker endpoint.
<br/><br/>
Key functions provided by inference.py:<br/>
```model_fn(model_dir)```:<br/>
This function is responsible for loading your serialized PyTorch model from the model_dir (where your model.tar.gz is extracted) into memory. This is where you would define the model architecture and load the saved weights.<br/>
```input_fn(request_body, content_type)```:<br/>
This function handles the deserialization of incoming inference requests. It takes the raw request body and its content type (e.g., JSON, CSV, NPY) and transforms it into a format suitable for your PyTorch model (e.g., a torch.Tensor).<br/>
```predict_fn(input_object, model)```:<br/>
This function performs the actual inference. It takes the preprocessed input from input_fn and the loaded model from model_fn, and then runs the prediction logic, returning the model's output.<br/>
```output_fn(prediction, accept_type)```:<br/>
This function serializes the model's predictions into the desired format for the client. It takes the output from predict_fn and the client's requested accept_type (e.g., JSON) and formats the response accordingly.<br/>

In [None]:
%%writefile resnet50/code/inference.py
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import io
import os
import json

def model_fn(model_dir):
    """
    Load the PyTorch model from the model_dir.
    """
    model = models.resnet50(pretrained=False) # Or load your specific ResNet50 architecture
    # Load the state dictionary
    with open(os.path.join(model_dir, "resnet50", 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
    model.eval()
    return model

def input_fn(request_body, content_type):
    """
    Preprocess the input data for inference.
    """
    if content_type == 'image/x-image':
        return Image.open(io.BytesIO(request_body))
    else:
        raise Exception(f"Unsupported content type: {content_type}")

def predict_fn(input_object, model):
    """
    Perform inference on the input data.
    """
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(input_object).unsqueeze(0)
    with torch.no_grad():
        output = model(input_tensor)
    return output


# The output_fn takes the prediction result and the requested content type
def output_fn(prediction, accept_type):
    """
    Formats the prediction output.
    """
    # Assuming `prediction` is a tensor of shape (1, num_classes)
    # Get the class with the highest probability
    score, predicted_class = torch.max(prediction, 1)

    # Get the human-readable class label (assuming you have a mapping)
    # For a real-world scenario, you would load this from a file
    # during model_fn.
    # For this example, we will just return the class index.
    imagenet_classes = models.ResNet50_Weights.DEFAULT.meta["categories"]
    output_dict = {
        'label': imagenet_classes[predicted_class.item()],
        'score': score.item()
    }

    if accept_type == "application/json":
        return json.dumps([output_dict]), accept_type
    else:
        # Default to JSON for other unsupported types
        return json.dumps([output_dict]), "application/json"

In [None]:
# Create a model.tar.gz file and upload to S3
!tar -czvf model.tar.gz resnet50/*
resnet50_model_data = sess.upload_data(
        path="model.tar.gz", bucket=sess.default_bucket(), key_prefix="model/pytorch"
    )

In [None]:
print(resnet50_model_data)

In [None]:
from sagemaker.pytorch.model import PyTorchModel
model = PyTorchModel(
    entry_point="inference.py",
    source_dir="resnet50/code",
    role=role,
    model_data=resnet50_model_data,
    framework_version="1.13.1",
    py_version="py39",
)

In [None]:
PyTorchModel

In [None]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
from sagemaker.base_serializers import DataSerializer

# set local_mode to False if you want to deploy on a remote
# SageMaker instance

local_mode = False

if local_mode:
    instance_type = "local"
else:
    instance_type = "ml.m5.xlarge"

predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    serializer=DataSerializer(content_type='image/x-image'),
    deserializer=JSONDeserializer(),
)

pt_endpoint_name = predictor.endpoint_name
endpoint_cleanup_list.append(pt_endpoint_name)

print(f"Model deployed successfully!")
print(f"Endpoint name: {pt_endpoint_name}")

In [None]:
# Please note that top-5 list is not implemented in the inference.py file.
from IPython.display import display, Markdown
display(Markdown("### Please note that top-5 list is not implemented in the inference.py for simplicity of the lab."))
display_images_with_prediction(pt_endpoint_name, images, jumpstart_flag=False)

# Cleanup

In [None]:
for endpoint_name in endpoint_cleanup_list:
    cleanup_endpoint(endpoint_name)
