In [1]:
# Upgrade packages and install libsndfile1
!sudo yum upgrade -y
!sudo yum install libsndfile1 -y



In [2]:
# Install librosa (if you want to process mp3) and upgrade sagemaker
!pip install librosa
!pip install sagemaker --upgrade

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.131.1.tar.gz (665 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m665.2/665.2 kB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.131.1-py2.py3-none-any.whl size=902451 sha256=099897311376e3a81d6803e35cf90f4e353e0daf74b84ea9486a207c54e02025
  Stored in directory: /home/ec2-user/.cache/pip/wheels/94/de/5c/8fa9f9af1b43acd7ef0e8cf6d302b5f32a341af3aa2fe5013f
Successfully built sagemaker
Installing collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.128.0
    Uninstalling sagemaker-2.128.0:
      

In [3]:
import json
import time
import boto3
import numpy as np
import sagemaker
import sagemaker.huggingface

#BUCKET="[BUCKET_NAME]" # please use your bucket name
ROLE = sagemaker.get_execution_role()
sess = sagemaker.Session()
BUCKET = sess.default_bucket()
sagemaker
print(f"sagemaker role arn: {ROLE}")
print(f"sagemaker bucket: {BUCKET}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::348052051973:role/service-role/AmazonSageMakerServiceCatalogProductsExecutionRole
sagemaker bucket: sagemaker-us-east-1-348052051973
sagemaker session region: us-east-1


In [6]:
# The SageMaker Model is a container containing the running environment + inference scripte + model data.
# The SageMaker Endpoint is a running cluster of the SageMaker Models

from sagemaker.huggingface import HuggingFaceModel

# Change model_name (create one) and model_data (copy from the training job S3 model artifact) accordingly
model_name = 'whisper-zhtw6'
model_data = 's3://sagemaker-us-east-1-348052051973/whisper/data/zhtw-common-voice-processed/whisper-zhtw-1675301423/output/whisper-zhtw-1675301423/output/model.tar.gz'

huggingface_model = HuggingFaceModel(
        entry_point = 'inference.py',
        source_dir='./scripts',
        name = model_name,
        transformers_version='4.17.0',
        pytorch_version='1.10.2',
        py_version='py38',
        model_data=model_data,
        role=ROLE,
    )



In [None]:
# The create_model method takes a lot of temporary space under the root. For large models, the root disk in SageMaker Studio Notebook (which is a container) is not enough. 
# This notebook is simply invoking SageMaker APIs, so it can be done on a EC2 as well.

# For creating a new model and deploy as a new endpoint, the easiest way is to call the deploy method under the model

# predictor = huggingface_model.deploy(
#     initial_instance_count=1,
#     instance_type="ml.m5d.2xlarge", 
#     endpoint_name = model_name,
# )

# If you want to update an existing endpoint with a new model, you 
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

sess.create_model(
    model_name,
    ROLE,
    huggingface_model.prepare_container_def(
        instance_type='ml.m5d.2xlarge'
    )
)

predictor = Predictor('whisper-zhtw', serializer=JSONSerializer(), deserializer=JSONDeserializer())
predictor.update_endpoint(model_name=model_name, initial_instance_count=1, instance_type='ml.m5d.2xlarge')

----

In [8]:
%%time
# Do the prediction
from sagemaker.predictor import Predictor
from sagemaker.serializers import DataSerializer

audio_path = 'audio2.flac'
audio_serializer = DataSerializer(content_type='audio/x-audio')

predictor = Predictor('whisper-zhtw', serializer=audio_serializer)
with open(audio_path, "rb") as data_file:
    audio_data = data_file.read()
    
prediction = predictor.predict(audio_data)
print(prediction)

b'"\\u9019\\u9593\\u70ba\\u8005\\u9023\\u53bb\\u706b\\u5340\\u7e3d\\u5171\\u767c\\u751f\\u5169\\u5c64\\u6bb5\\u6027"'
CPU times: user 90.1 ms, sys: 16.1 ms, total: 106 ms
Wall time: 9.13 s


In [9]:
prediction.decode('unicode_escape')

'"這間為者連去火區總共發生兩層段性"'