In [3]:
!pip install librosa
!pip install sagemaker --upgrade

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [4]:
import json
import time
import boto3
import numpy as np
import sagemaker
import sagemaker.huggingface

#BUCKET="[BUCKET_NAME]" # please use your bucket name
ROLE = sagemaker.get_execution_role()
sess = sagemaker.Session()
BUCKET = sess.default_bucket()
print(f"sagemaker role arn: {ROLE}")
print(f"sagemaker bucket: {BUCKET}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::348052051973:role/service-role/AmazonSageMakerServiceCatalogProductsExecutionRole
sagemaker bucket: sagemaker-us-east-1-348052051973
sagemaker session region: us-east-1


In [35]:
from sagemaker.huggingface import HuggingFaceModel

model_name = 'whisper-zhcn6'
model_data = 's3://sagemaker-us-east-1-348052051973/whisper-zhcn/whisper-zhcn-1672987430/output/whisper-zhcn-1672987430/output/model.tar.gz'
instance_type = 'ml.m5d.4xlarge'
huggingface_model = HuggingFaceModel(
        entry_point = 'inference2.py',
        source_dir='./scripts',
        name = model_name,
        transformers_version='4.17.0',
        pytorch_version='1.10.2',
        py_version='py38',
        model_data=model_data,
        role=ROLE,
    )

In [36]:
# predictor = huggingface_model.deploy(
#     initial_instance_count=1,
#     instance_type="ml.m5d.4xlarge", 
#     endpoint_name = model_name,
# )

In [37]:
# first time deploy as endpoint
# predictor = huggingface_model.deploy(
#     initial_instance_count=1,
#     instance_type="ml.m5d.2xlarge", 
#     endpoint_name = model_name,
# )

# update endpoint
from sagemaker.predictor import Predictor
from sagemaker.serializers import DataSerializer
from sagemaker.deserializers import JSONDeserializer

sess.create_model(
    model_name,
    ROLE,
    huggingface_model.prepare_container_def(
        instance_type=instance_type
    )
)
audio_serializer = DataSerializer(content_type='audio/x-audio')


predictor = Predictor('whisper-zhcn', serializer=audio_serializer, deserializer=JSONDeserializer())
predictor.update_endpoint(model_name=model_name, initial_instance_count=1, instance_type=instance_type)

--------!

In [38]:
# # inference audio file that download from S3 bucket or inference local audio file 
# import soundfile
# import IPython.display as ipd
# import numpy as np

# # s3.download_file(BUCKET, 'huggingface-blog/sample_audio/xxxxxx.wav', 'downloaded.wav')
# # file_name ='downloaded.wav'

# # download a sample audio file by using below link
# !wget https://datashare.ed.ac.uk/bitstream/handle/10283/343/MKH800_19_0001.wav
    
# file_name ='MKH800_19_0001.wav'

# speech_array, sampling_rate = soundfile.read(file_name)

# ipd.Audio(data=np.asarray(speech_array), autoplay=False, rate=16000)

In [63]:
%%time
from sagemaker.predictor import Predictor
from sagemaker.serializers import DataSerializer

audio_path = 'audio2.flac'
audio_serializer = DataSerializer(content_type='audio/x-audio')

predictor = Predictor('whisper-zhcn', serializer=audio_serializer)
with open(audio_path, "rb") as data_file:
    audio_data = data_file.read()
    
prediction = predictor.predict(audio_data)
print(prediction)

b'"\\u9019\\u4ef6\\u8b2a\\u8005\\uff0c\\u539f\\u53bb\\u706b\\u72ac\\u4e2d\\u5171\\u65b9\\u5f0f\\u8fb2\\u7ce7\\u4e2d\\u5c08\\u7dda\\ufffd"'
CPU times: user 88.5 ms, sys: 3.34 ms, total: 91.8 ms
Wall time: 8.11 s


In [64]:
prediction.decode('unicode_escape')

'"這件謪者，原去火犬中共方式農糧中專線�"'

In [62]:
import soundfile as sf
import librosa
import numpy as np

audio_path = 'audio2.wav'

data, sample_rate = librosa.load(audio_path, sr=16000)
sf.write('audio2.flac', data, samplerate=16000)