## Create the model and deploy the model

In [63]:
import time
import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel

# 初始化 SageMaker 会话和 boto3 客户端
sagemaker_session = sagemaker.Session()
sm_client = boto3.client('sagemaker')
role = sagemaker.get_execution_role()

# 设置模型和端点名称
model_data = 's3://sagemaker-us-east-1-921656639050/tencent-whisper-lora-fine-tuning-2024-09-09-10-24-48/output/model.tar.gz'
model_name = f"whisper-lora-model-{int(time.time())}"
endpoint_name = 'whisper-lora-endpoint'

# 创建 PyTorch 模型
pytorch_model = PyTorchModel(
    model_data=model_data,
    role=role,
    entry_point='inference.py',
    source_dir='/home/ec2-user/SageMaker/tencent_asr',
    framework_version='2.3',
    py_version='py311',
    predictor_cls=sagemaker.predictor.Predictor,
    name=model_name,
    model_server_workers=4
)

# 部署模型
pytorch_model.deploy(
    instance_type='ml.g5.2xlarge',
    initial_instance_count=1,
    endpoint_name=endpoint_name
)

# 等待模型部署完成
waiter = sm_client.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=endpoint_name)
print('model deployed')

INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-east-1-921656639050/tencent-whisper-lora-fine-tuning-2024-09-09-10-24-48/output/model.tar.gz), script artifact (/home/ec2-user/SageMaker/tencent_asr), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-east-1-921656639050/whisper-lora-model-1725948420/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: whisper-lora-model-1725948420
INFO:sagemaker:Creating endpoint-config with name whisper-lora-endpoint
INFO:sagemaker:Creating endpoint with name whisper-lora-endpoint


--------------!model deployed


## Update the SageMaker Endpoint

In [54]:
# 创建新的端点配置
new_config_name = f"whisper-lora-config-{int(time.time())}"
create_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=new_config_name,
    ProductionVariants=[{
        'InstanceType': 'ml.g5.2xlarge',
        'InitialInstanceCount': 1,
        'ModelName': model_name,
        'VariantName': 'AllTraffic'
    }]
)

In [55]:
# 检查端点是否存在
try:
    sm_client.describe_endpoint(EndpointName=endpoint_name)
    endpoint_exists = True
except sm_client.exceptions.ClientError:
    endpoint_exists = False

if endpoint_exists:
    # 更新现有端点
    print(f"Updating existing endpoint: {endpoint_name}")
    sm_client.update_endpoint(
        EndpointName=endpoint_name,
        EndpointConfigName=new_config_name
    )
else:
    # 创建新端点
    print(f"Creating new endpoint: {endpoint_name}")
    sm_client.create_endpoint(
        EndpointName=endpoint_name,
        EndpointConfigName=new_config_name
    )

# 等待端点更新完成
waiter = sm_client.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=endpoint_name)

print(f"Endpoint {endpoint_name} is now updated and in service.")

Updating existing endpoint: whisper-lora-endpoint
Endpoint whisper-lora-endpoint is now updated and in service.


## Test the Endpoint with local query

In [66]:
# 请求，测试环境
import boto3
import json
import librosa
import numpy as np
import io
import time

def prepare_audio(audio_file, target_sr=16000):
    # 读取音频文件
    audio, sr = librosa.load(audio_file, sr=target_sr)
    
    # 确保音频是单声道的 float32 类型
    audio = audio.astype(np.float32)
    
    # 将 numpy 数组转换为字节
    audio_bytes = audio.tobytes()
    
    return audio_bytes

# 音频文件路径
audio_file_path = './English_04.wav'  # 替换为你的音频文件路径

# 准备音频数据
audio_data = prepare_audio(audio_file_path)

# 创建 SageMaker runtime 客户端
runtime = boto3.client('runtime.sagemaker')

for idx in range(10):
    begin = time.time()
    # 发送请求到端点
    response = runtime.invoke_endpoint(
        EndpointName='whisper-lora-endpoint',
        ContentType='application/octet-stream',
        Body=audio_data
    )

    # 解析响应
    result = json.loads(response['Body'].read().decode())
    end = time.time()
    print(idx, end - begin, result)

0 0.3879268169403076 {'transcription': 'I want to play Sawyer.'}
1 0.3486473560333252 {'transcription': 'I want to play Sawyer.'}
2 0.35468602180480957 {'transcription': 'I want to play Sawyer.'}
3 0.3473360538482666 {'transcription': 'I want to play Sawyer.'}
4 0.34491586685180664 {'transcription': 'I want to play Sawyer.'}
5 0.3428187370300293 {'transcription': 'I want to play Sawyer.'}
6 0.3446674346923828 {'transcription': 'I want to play Sawyer.'}
7 0.3419368267059326 {'transcription': 'I want to play Sawyer.'}
8 0.34696412086486816 {'transcription': 'I want to play Sawyer.'}
9 0.3298470973968506 {'transcription': 'I want to play Sawyer.'}
