# Deploy a model on SageMaker

In [1]:
%pip install sagemaker --upgrade --quiet --no-warn-conflicts

Note: you may need to restart the kernel to use updated packages.


In [2]:
import json
import sagemaker
import boto3

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # bucket to house artifacts
region = sess._region_name  # region name of the current SageMaker Studio environment

sm_client = boto3.client("sagemaker")  # client to intreract with SageMaker
smr_client = boto3.client("sagemaker-runtime")  # client to intreract with SageMaker Endpoints
s3_client = boto3.client("s3")

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")
print(f"sagemaker version: {sagemaker.__version__}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::736221153822:role/SageMaker-ServiceRole-Default
sagemaker bucket: sagemaker-us-east-1-736221153822
sagemaker session region: us-east-1
sagemaker version: 2.246.0


## HF container with default handler

In [13]:
from sagemaker.huggingface import HuggingFaceModel

model_name = sagemaker.utils.name_from_base("model")
endpoint_name = model_name

# Hub Model configuration. https://huggingface.co/models
hub = {
	"HF_MODEL_ID": "openai/whisper-large-v2",
	"HF_TASK": "automatic-speech-recognition"
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	name=model_name,
    transformers_version='4.49.0',
	pytorch_version='2.6.0',
	py_version='py312',
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.g6e.2xlarge', # ec2 instance type
    endpoint_name=endpoint_name
)

-------------!

In [24]:
from sagemaker.jumpstart import utils

# The wav files must be sampled at 16kHz (this is required by the automatic speech recognition models), so make sure to resample them if required. The input audio file must be less than 30 seconds.
s3_bucket = utils.get_jumpstart_content_bucket()
key_prefix = "training-datasets/asr_notebook_data"
input_audio_file_name = "sample1.wav"

s3_client.download_file(s3_bucket, f"{key_prefix}/{input_audio_file_name }", input_audio_file_name)

input_audio_file_name = "sample_french1.wav"

s3_client.download_file(s3_bucket, f"{key_prefix}/{input_audio_file_name }", input_audio_file_name )

In [15]:
from sagemaker.serializers import DataSerializer
	
predictor.serializer = DataSerializer(content_type='audio/x-audio')
predictor.content_type = "audio/x-audio"

# Make sure the input file "sample1.flac" exists
with open(input_audio_file_name, "rb") as f:
	data = f.read()
predictor.predict(data)

{'text': " We are living in very exciting times with machine learning. The speed of ML model development will really actually increase, but you won't get to that end state that we want in the next coming years unless we actually make these models more accessible to everybody."}

In [None]:
sess.delete_endpoint(endpoint_name)
sess.delete_endpoint_config(endpoint_name)
sess.delete_model(model_name)

## HF container with custom handler

Model is deployed from HF hub. Custom handler is placed on S3

In [33]:
# OVERRIDE:
image_uri = f"763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-inference:2.6.0-transformers4.49.0-gpu-py312-cu124-ubuntu22.04"
model_name = sagemaker.utils.name_from_base("model")
endpoint_name = model_name

# sagemaker config
instance_type = "ml.g6e.4xlarge"
health_check_timeout = 900

model = sagemaker.Model(
	role=role, 
    name=model_name,
    image_uri=image_uri,
    model_data={
        'S3DataSource': {
            'S3Uri': "s3://dsoldat-ml/models/openai/whisper-large-v3-turbo/",
            'S3DataType': 'S3Prefix',
            'CompressionType': 'None'
        }
    },
    env={
        "HF_MODEL_ID": "openai/whisper-large-v3"
    },
    sagemaker_session=sess,
)

# Deploy model to an endpoint
model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    container_startup_health_check_timeout=health_check_timeout,
    endpoint_name=endpoint_name,
)

-----------!

In [34]:
def query_endpoint(body, content_type):
    response = smr_client.invoke_endpoint(EndpointName=endpoint_name, ContentType=content_type, Body=body)
    model_predictions = json.loads(response['Body'].read())
    print(json.dumps(model_predictions, indent=2))

In [37]:
input_audio_file_name = "sample1.wav"

with open(input_audio_file_name, "rb") as file:
    wav_file_read = file.read()

query_endpoint(wav_file_read, "audio/wav")

{
  "text": [
    " We are living in very exciting times with machine learning. The speed of ML model development will really actually increase. But you won't get to that end state that we want in the next coming years unless we actually make these models more accessible to everybody."
  ]
}


In [38]:
input_audio_file_name = "sample_french1.wav"

with open(input_audio_file_name, "rb") as file:
    wav_file_read = file.read()

payload = {"audio_input": wav_file_read.hex(),
           "language": "french",
           "task": "translate"}

query_endpoint(json.dumps(payload).encode('utf-8'), "application/json")

{
  "text": [
    " Welcome to JPB Syst\u00e8mes. Here, we have more than 150 employees, more than 90% of the turnover at export, and we have developed about 15 products."
  ]
}


In [39]:
sess.delete_endpoint(endpoint_name)
sess.delete_endpoint_config(endpoint_name)
sess.delete_model(model_name)