In [15]:
%%capture
!pip install -U sagemaker librosa

In [5]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel

session = sagemaker.Session(boto_session=boto3.Session(region_name='us-east-1'))

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
print(role)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
arn:aws:iam::582575198414:role/service-role/AmazonSageMaker-ExecutionRole-20241216T115527


In [5]:
# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'openai/whisper-large-v3',
	'HF_TASK':'automatic-speech-recognition'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	transformers_version='4.49.0',
	pytorch_version='2.6.0',
	py_version='py312',
	env=hub,
	role=role,
    sagemaker_session=session
)

In [7]:
instance_type='ml.g5.12xlarge'
instance_type='ml.g5.xlarge'
instance_type='ml.g6.xlarge'

In [7]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type=instance_type # ec2 instance type
)

-----------!

In [12]:
# from sagemaker.predictor import Predictor
# predictor = Predictor(
#     endpoint_name="huggingface-pytorch-inference-2025-10-07-07-54-46-074",
#     sagemaker_session=session
# )

In [13]:
from sagemaker.serializers import DataSerializer
from sagemaker.deserializers import JSONDeserializer	

predictor.serializer = DataSerializer(content_type='audio/x-audio')
predictor.deserializer = JSONDeserializer()

In [14]:
# Make sure the input file "sample1.flac" exists
with open("mlk.flac", "rb") as f:
	data = f.read()
predictor.predict(data)

{'text': ' I have a dream that one day this nation will rise up and live out the true meaning of its creed.'}

In [2]:
import librosa
duration = librosa.get_duration(path="mlk.flac")
duration

13.0

In [15]:
import time
iters = 10

start = time.time()
for i in range(0,iters):
    predictor.predict(data)
end = time.time()

transcription_time = (end-start)/iters
transcription_time

1.7723254919052125

In [8]:
pricing = boto3.client('pricing', region_name='us-east-1')

response = pricing.get_products(
    ServiceCode='AmazonSageMaker',
    Filters=[
        {'Type': 'TERM_MATCH', 'Field': 'instanceType', 'Value': instance_type},
        {'Type': 'TERM_MATCH', 'Field': 'productFamily', 'Value': 'ML Instance'},
        {'Type': 'TERM_MATCH', 'Field': 'location', 'Value': 'US West (Oregon)'}
    ]
)

In [9]:
import json
data = json.loads(response['PriceList'][0])
on_demand = data['terms']['OnDemand']
first_term = next(iter(on_demand.values()))
first_dimension = next(iter(first_term['priceDimensions'].values()))
price = float(first_dimension['pricePerUnit']['USD'])
price

1.127

In [16]:
price_to_transcrivbe_1_sec = price / (3600.0/transcription_time*duration)
price_to_transcrivbe_1_sec

4.2679718576435357e-05

In [17]:
predictor.delete_model()
predictor.delete_endpoint()