# Deploy a model on SageMaker

In [None]:
%pip install sagemaker --upgrade --quiet --no-warn-conflicts

In [None]:
import json
import sagemaker
import boto3

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # bucket to house artifacts
region = sess._region_name  # region name of the current SageMaker Studio environment

sm_client = boto3.client("sagemaker")  # client to intreract with SageMaker
smr_client = boto3.client("sagemaker-runtime")  # client to intreract with SageMaker Endpoints
s3_client = boto3.client("s3")

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")
print(f"sagemaker version: {sagemaker.__version__}")

## HF container with default handler

In [None]:
from sagemaker.huggingface import HuggingFaceModel

model_name = sagemaker.utils.name_from_base("model")
endpoint_name = model_name

# Hub Model configuration. https://huggingface.co/models
hub = {
	#"HF_MODEL_ID": "openai/whisper-large-v2",
    "HF_MODEL_ID": "techiaith/whisper-large-v3-ft-verbatim-cy-en",
	"HF_TASK": "automatic-speech-recognition"
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	name=model_name,
    transformers_version='4.49.0',
	pytorch_version='2.6.0',
	py_version='py312',
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.g6e.2xlarge', # ec2 instance type
    endpoint_name=endpoint_name
)

In [None]:
from sagemaker.jumpstart import utils

# The wav files must be sampled at 16kHz (this is required by the automatic speech recognition models), so make sure to resample them if required. The input audio file must be less than 30 seconds.
s3_bucket = utils.get_jumpstart_content_bucket()
key_prefix = "training-datasets/asr_notebook_data"
input_audio_file_name = "sample1.wav"

s3_client.download_file(s3_bucket, f"{key_prefix}/{input_audio_file_name }", input_audio_file_name)

In [None]:
from sagemaker.serializers import DataSerializer
	
predictor.serializer = DataSerializer(content_type='audio/x-audio')
predictor.content_type = "audio/x-audio"

# Make sure the input file "sample1.flac" exists
with open(input_audio_file_name, "rb") as f:
	data = f.read()
predictor.predict(data)

In [None]:
sess.delete_endpoint(endpoint_name)
sess.delete_endpoint_config(endpoint_name)
sess.delete_model(model_name)