# [모듈 2.2] SageMaker 앤드포인트에 한개의 모델 Triton 배포

# 1. 환경 셋업

## 1.1. 기본 세팅
사용하는 패키지는 import 시점에 다시 재로딩 합니다.

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('./src')

전 노트북에서 훈련 후의 아티펙트를 가져옵니다.

In [2]:
%store -r model_serving_folder
%store -r model_name
%store -r bucket

In [3]:
import sagemaker

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

import boto3, json, sagemaker, time
import numpy as np
sm_client = boto3.client(service_name="sagemaker")

## 1.2. 변수 설정

In [4]:
prefix = "triton-ncf"

ts = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
# endpoint variables
sm_model_name = f"{prefix}-mdl-{ts}"
endpoint_config_name = f"{prefix}-epc-{ts}"
endpoint_name = f"{prefix}-ep-{ts}"
model_data_url = f"s3://{bucket}/{prefix}/"
instance_type = "local_gpu"

In [5]:
print("sm_model_name: \n", sm_model_name)
print("endpoint_config_name: \n", endpoint_config_name)
print("endpoint_name: \n", endpoint_name)

sm_model_name: 
 triton-ncf-mdl-2022-12-11-07-57-19
endpoint_config_name: 
 triton-ncf-epc-2022-12-11-07-57-19
endpoint_name: 
 triton-ncf-ep-2022-12-11-07-57-19


## 1.3. Triton Docker Image 결정

In [6]:
from triton_util import account_id_map
region = boto3.Session().region_name

base = "amazonaws.com.cn" if region.startswith("cn-") else "amazonaws.com"
mme_triton_image_uri = (
    "{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:22.07-py3".format(
        account_id=account_id_map[region], region=region, base=base
    )
)
print("mme_triton_image_uri: \n", mme_triton_image_uri)

mme_triton_image_uri: 
 785573368785.dkr.ecr.us-east-1.amazonaws.com/sagemaker-tritonserver:22.07-py3


In [7]:
model_serving_folder

'triton-docker-serve-pt'

# 2. 모델 패키징 (model.tar.gz) 및 S3 업로딩



In [8]:
import os
from triton_util import tar_artifact, upload_tar_s3

    
model_tar_file = tar_artifact(model_serving_folder, model_name)    
print("model_tar_file: ", model_tar_file)
model_uri_pt = upload_tar_s3(sagemaker_session, model_tar_file, prefix)
print("model_uri_pt: ", model_uri_pt)

drwxrwxr-x ec2-user/ec2-user 0 2022-12-11 06:59 ncf_food_model/
-rw-rw-r-- ec2-user/ec2-user 306 2022-12-11 07:52 ncf_food_model/config.pbtxt
drwxrwxr-x ec2-user/ec2-user   0 2022-12-11 06:59 ncf_food_model/1/
-rw-rw-r-- ec2-user/ec2-user 6440569 2022-12-11 07:52 ncf_food_model/1/model.pt
model_tar_file:  ncf_food_model.model.tar.gz
model_uri_pt:  s3://sagemaker-us-east-1-057716757052/triton-ncf/ncf_food_model.model.tar.gz


# 3. 로컬 모드 설정
- 내부적으로 Triton 서버가 구동시에 아래 URL 스크립트가 구동 됨.
    - 여기에 맞는 필요한 환경 변수를 넣어 줌.
    - https://raw.githubusercontent.com/triton-inference-server/server/main/docker/sagemaker/serve

In [9]:
from sagemaker.model import Model

In [10]:
container_envs = {
                    "SAGEMAKER_TRITON_LOG_VERBOSE": "3",
                    "SAGEMAKER_TRITON_LOG_INFO": "1",
                    "SAGEMAKER_TRITON_LOG_WARNING" : "1",
                    "SAGEMAKER_TRITON_LOG_ERROR" : "1"
                 }

local_pytorch_model = Model(model_data= model_uri_pt,
                            image_uri = mme_triton_image_uri,
                            role=role,
                            env = container_envs
                            )


In [11]:


local_predictor = local_pytorch_model.deploy(
                           instance_type=instance_type, 
                           initial_instance_count=1, 
                           endpoint_name=endpoint_name,
                           wait=True,
                           log = False,
                        )

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded
Attaching to holdgwluib-algo-1-5toty
[36mholdgwluib-algo-1-5toty |[0m 
[36mholdgwluib-algo-1-5toty |[0m == Triton Inference Server ==
[36mholdgwluib-algo-1-5toty |[0m 
[36mholdgwluib-algo-1-5toty |[0m NVIDIA Release 22.07 (build <unknown>)
[36mholdgwluib-algo-1-5toty |[0m Triton Server Version 2.24.0
[36mholdgwluib-algo-1-5toty |[0m 
[36mholdgwluib-algo-1-5toty |[0m Copyright (c) 2018-2022, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
[36mholdgwluib-algo-1-5toty |[0m 
[36mholdgwluib-algo-1-5toty |[0m Various files include modifications (c) NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
[36mholdgwluib-algo-1-5toty |[0m 
[36mholdgwluib-algo-1-5toty |[0m This container image and its contents are governed by the NVIDIA Deep Learning Container License.
[36mholdgwluib-algo-1-5toty |[0m By pulling and using the container, you accept the terms and conditions of this license:
[36mholdgwluib-algo-1-5toty |[0m https://developer.nvidia.co

# 4. 추론 테스트

In [13]:
def create_sample_payload():
    # user
    user_np = np.zeros((1,100)).astype(np.int32)
    # item
    item_np = np.random.randint(low=1, high=1000, size=(1,100)).astype(np.int32)

    payload = {
        "inputs": [
            {"name": "INPUT__0", "shape": [1,100], 
             "datatype": "INT32", "data": user_np.tolist()},
            {"name": "INPUT__1", "shape": [1,100], 
             "datatype": "INT32", "data": item_np.tolist()},
        ]
    }
    
    return payload

payload = create_sample_payload()
print("payload: ", payload)

payload:  {'inputs': [{'name': 'INPUT__0', 'shape': [1, 100], 'datatype': 'INT32', 'data': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}, {'name': 'INPUT__1', 'shape': [1, 100], 'datatype': 'INT32', 'data': [[997, 48, 165, 243, 762, 44, 590, 402, 982, 80, 731, 720, 205, 498, 257, 988, 344, 267, 761, 668, 452, 654, 142, 766, 1, 281, 232, 168, 602, 455, 431, 734, 261, 125, 476, 958, 312, 434, 742, 695, 861, 672, 630, 685, 107, 755, 162, 223, 299, 250, 585, 438, 184, 661, 594, 458, 378, 2, 345, 570, 913, 925, 456, 951, 374, 247, 470, 594, 187, 894, 658, 144, 453, 817, 277, 402, 386, 36, 204, 932, 740, 423, 193, 689, 834, 926, 843, 380, 971, 798, 550, 437, 520, 40, 501, 676, 304, 508, 243, 12]]}]}


In [14]:
def single_model_invoke_endpoint(client,endpoint_name, payload): 
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType="application/octet-stream", 
        Body=json.dumps(payload),
    )

    result = json.loads(response["Body"].read().decode("utf8"))
    
    return result

runtime_client = sagemaker.local.LocalSagemakerRuntimeClient()    
result = single_model_invoke_endpoint(runtime_client,endpoint_name, payload)
print("result : ", result)

[36mholdgwluib-algo-1-5toty |[0m I1211 07:57:53.768023 91 sagemaker_server.cc:190] SageMaker request: 2 /invocations
[36mholdgwluib-algo-1-5toty |[0m I1211 07:57:53.768123 91 model_repository_manager.cc:773] GetModel() 'ncf_food_model' version -1
[36mholdgwluib-algo-1-5toty |[0m I1211 07:57:53.768151 91 model_repository_manager.cc:773] GetModel() 'ncf_food_model' version -1
[36mholdgwluib-algo-1-5toty |[0m I1211 07:57:53.769230 91 infer_request.cc:713] [request id: <id_unknown>] prepared: [0x0x7f5bd4004880] request id: , model: ncf_food_model, requested version: -1, actual version: 1, flags: 0x0, correlation id: 0, batch size: 1, priority: 0, timeout (us): 0
[36mholdgwluib-algo-1-5toty |[0m original inputs:
[36mholdgwluib-algo-1-5toty |[0m [0x0x7f5bd4015278] input: INPUT__1, type: INT32, original shape: [1,100], batch + shape: [1,100], shape: [100]
[36mholdgwluib-algo-1-5toty |[0m [0x0x7f5bd4014e58] input: INPUT__0, type: INT32, original shape: [1,100], batch + shape: [1,

# 5. 로컬 앤드포인트 삭제

In [15]:
from inference_utils import delete_endpoint

In [16]:
client = sagemaker.local.LocalSagemakerClient()
delete_endpoint(client, endpoint_name)

Gracefully stopping... (press Ctrl+C again to force)
--- Deleted model: sagemaker-tritonserver-2022-12-11-07-57-32-571
--- Deleted endpoint_config: triton-ncf-ep-2022-12-11-07-57-19
--- Deleted endpoint: triton-ncf-ep-2022-12-11-07-57-19


# 6. 클라우드 배포

## 6.1. 변수 및 컨테이너 설정

In [17]:
sm_model_name = f"{prefix}-mdl-{ts}"
real_endpoint_config_name = f"{prefix}-epc-{ts}"
real_endpoint_name = f"{prefix}-ep-{ts}"

In [18]:
container = {"Image": mme_triton_image_uri,
             "ModelDataUrl": model_uri_pt}


In [19]:
print("container: ", container)
print("sm_model_name: ", sm_model_name)

container:  {'Image': '785573368785.dkr.ecr.us-east-1.amazonaws.com/sagemaker-tritonserver:22.07-py3', 'ModelDataUrl': 's3://sagemaker-us-east-1-057716757052/triton-ncf/ncf_food_model.model.tar.gz'}
sm_model_name:  triton-ncf-mdl-2022-12-11-07-57-19


## 6.2. 세이지 메이커 모델, 앤드포인트 컨피그, 앤드포인트 생성

In [20]:
create_model_response = sm_client.create_model(
    ModelName=sm_model_name, ExecutionRoleArn=role, PrimaryContainer=container
)

print("Model Arn: " + create_model_response["ModelArn"])

Model Arn: arn:aws:sagemaker:us-east-1:057716757052:model/triton-ncf-mdl-2022-12-11-07-57-19


In [21]:
create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=real_endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.g4dn.4xlarge",
            "InitialVariantWeight": 1,
            "InitialInstanceCount": 1,
            "ModelName": sm_model_name,
            "VariantName": "AllTraffic",
        }
    ],
)

print("Endpoint Config Arn: " + create_endpoint_config_response["EndpointConfigArn"])

Endpoint Config Arn: arn:aws:sagemaker:us-east-1:057716757052:endpoint-config/triton-ncf-epc-2022-12-11-07-57-19


In [22]:
create_endpoint_response = sm_client.create_endpoint(
    EndpointName=real_endpoint_name, EndpointConfigName= real_endpoint_config_name
)

print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

Endpoint Arn: arn:aws:sagemaker:us-east-1:057716757052:endpoint/triton-ncf-ep-2022-12-11-07-57-19


In [23]:
%%time 

resp = sm_client.describe_endpoint(EndpointName= real_endpoint_name)
status = resp["EndpointStatus"]
print("Status: " + status)

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=real_endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)

print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: InService
Arn: arn:aws:sagemaker:us-east-1:057716757052:endpoint/triton-ncf-ep-2022-12-11-07-57-19
Status: InService
CPU times: user 76 ms, sys: 9.44 ms, total: 85.4 ms
Wall time: 5min


## 6.3. 추론

In [24]:
runtime_client = boto3.Session().client('sagemaker-runtime')
single_model_invoke_endpoint(runtime_client,real_endpoint_name, payload)


{'model_name': 'ncf_food_model',
 'model_version': '1',
 'outputs': [{'name': 'OUTPUT__0',
   'datatype': 'FP32',
   'shape': [1, 100, 1],
   'data': [-4.839515209197998,
    1.4735954999923706,
    -2.369269847869873,
    0.5929858088493347,
    -1.3574788570404053,
    2.9009170532226562,
    -4.5265092849731445,
    -3.1178770065307617,
    -1.8846498727798462,
    -2.0428261756896973,
    -1.0859817266464233,
    -0.7866544723510742,
    -0.5726983547210693,
    -3.2671961784362793,
    -2.586115837097168,
    -1.6596626043319702,
    -3.8252816200256348,
    -2.306828022003174,
    -3.1906089782714844,
    -3.2827816009521484,
    -0.7897400856018066,
    -2.908857822418213,
    -1.0965995788574219,
    0.8126152753829956,
    2.704719066619873,
    -1.0788019895553589,
    -3.399214267730713,
    1.44561767578125,
    -1.383210301399231,
    -1.9774476289749146,
    -1.0886330604553223,
    -2.0051159858703613,
    -3.70497989654541,
    -2.6708216667175293,
    -4.68700218200683

# 7. 앤드포인트 삭제

In [25]:
client = boto3.Session().client('sagemaker')
delete_endpoint(client, real_endpoint_name)

--- Deleted model: triton-ncf-mdl-2022-12-11-07-57-19
--- Deleted endpoint_config: triton-ncf-epc-2022-12-11-07-57-19
--- Deleted endpoint: triton-ncf-ep-2022-12-11-07-57-19
