In [36]:
import boto3

# Specify your AWS Region
aws_region='ap-northeast-2'

# Create a low-level SageMaker service client.
sagemaker_client = boto3.client('sagemaker', region_name=aws_region)

# Role to give SageMaker permission to access AWS services.
sagemaker_role= "arn:aws:sts::907729080149:assumed-role/SagemakerFullAccess/SageMaker"

In [37]:
#Create a variable w/ the model S3 URI
s3_bucket = 'x-beagle' # Provide the name of your S3 bucket
bucket_prefix='asyncronize'
model_s3_key = f"{bucket_prefix}/model.tar.gz"

#Specify S3 bucket w/ model
model_url = f"s3://{s3_bucket}/{model_s3_key}"

In [38]:
#PyTorch 1.12 버전을 사용(기본에 없음...)
container = '763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/pytorch-inference:1.12.0-gpu-py38'

In [39]:
from sagemaker import get_execution_role

model_name = 'x-beagle-asyncronize'
sagemaker_role = get_execution_role()

create_model_response = sagemaker_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = sagemaker_role,
    PrimaryContainer = {
        'Image': container,
        'ModelDataUrl': model_url,
        'Environment': {
            'TS_MAX_REQUEST_SIZE': '100000000',
            'TS_MAX_RESPONSE_SIZE': '100000000',
            'TS_DEFAULT_RESPONSE_TIMEOUT': '1000'
        },
    })

In [40]:
import datetime
from time import gmtime, strftime

# Create an endpoint config name. Here we create one based on the date  
# so it we can search endpoints based on creation time.
endpoint_config_name = f"x-beagleEndpointConfig-{strftime('%Y-%m-%d-%H-%M-%S', gmtime())}"

# The name of the model that you want to host. This is the name that you specified when creating the model.
model_name='x-beagle-asyncronize'

create_endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name, # You will specify this name in a CreateEndpoint request.
    # List of ProductionVariant objects, one for each model that you want to host at this endpoint.
    ProductionVariants=[
        {
            "VariantName": "x-beagle", # The name of the production variant.
            "ModelName": model_name, 
            "InstanceType": "ml.g4dn.4xlarge", # Specify the compute instance type.
            "InitialInstanceCount": 1 # Number of instances to launch initially.
        }
    ],
    AsyncInferenceConfig={
        "OutputConfig": {
            "S3OutputPath": f"s3://{s3_bucket}/{bucket_prefix}/output"
        },
        "ClientConfig": {
            # (Optional) Specify the max number of inflight invocations per instance
            # If no value is provided, Amazon SageMaker will choose an optimal value for you
            "MaxConcurrentInvocationsPerInstance": 4
        }
    }
)

print(f"Created EndpointConfig: {create_endpoint_config_response['EndpointConfigArn']}")

Created EndpointConfig: arn:aws:sagemaker:ap-northeast-2:907729080149:endpoint-config/x-beagleEndpointConfig-2024-05-24-18-29-46


In [43]:
# The name of the endpoint.The name must be unique within an AWS Region in your AWS account.
endpoint_name = 'x-beagle-asyncronize-endpoint' 

# The name of the endpoint configuration associated with this endpoint.
endpoint_config_name='x-beagleEndpointConfig-2024-05-24-18-29-46'

create_endpoint_response = sagemaker_client.create_endpoint(
                                            EndpointName=endpoint_name, 
                                            EndpointConfigName=endpoint_config_name) 


In [23]:
# Create a low-level client representing Amazon SageMaker Runtime
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name='ap-northeast-2')

# Specify the location of the input. Here, a single SVM sample
input_location = "s3://x-beagle/additional_data"

# The name of the endpoint. The name must be unique within an AWS Region in your AWS account. 
endpoint_name='x-beagle-asyncronize-endpoint'

# After you deploy a model into production using SageMaker hosting 
# services, your client applications use this API to get inferences 
# from the model hosted at the specified endpoint.
response = sagemaker_runtime.invoke_endpoint_async(
                            EndpointName=endpoint_name, 
                            InputLocation=input_location,
                            InvocationTimeoutSeconds=3600)


In [44]:
import boto3
import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
import time

# Create a low-level client representing Amazon SageMaker Runtime
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name='ap-northeast-2')

# Create a low-level client representing Amazon SageMaker
sagemaker_client = boto3.client("sagemaker", region_name='ap-northeast-2')

# Specify the location of the input. Here, a single SVM sample
input_location = "s3://x-beagle/additional_data/test.jpg"

# The name of the endpoint. The name must be unique within an AWS Region in your AWS account. 
endpoint_name='x-beagle-asyncronize-endpoint'

# After you deploy a model into production using SageMaker hosting 
# services, your client applications use this API to get inferences 
# from the model hosted at the specified endpoint.
response = sagemaker_runtime.invoke_endpoint_async(
                            EndpointName=endpoint_name, 
                            InputLocation=input_location,
                            InvocationTimeoutSeconds=3600)

# Wait for the async inference to complete
output_location = None
max_attempts = 10000  # Increase the maximum number of attempts
attempt = 0

while output_location is None and attempt < max_attempts:
    endpoint_description = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    if 'OutputConfig' in endpoint_description['AsyncInferenceConfig']:
        outputs = endpoint_description['AsyncInferenceConfig']['OutputConfig']['S3OutputPath']
        output_location = f"{outputs}/{response['InferenceId']}.out"
    time.sleep(20)  # Wait for 5 seconds before checking again
    attempt += 1

if output_location is None:
    raise ValueError("Timed out waiting for inference result.")

# Download the inference result from S3
s3_client = boto3.client('s3')
bucket_name = output_location.split('/')[2]
object_key = '/'.join(output_location.split('/')[3:])

# Wait until the result file exists
waiter = s3_client.get_waiter('object_exists')
waiter.wait(Bucket=bucket_name, Key=object_key)

response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
result = response['Body'].read().decode('utf-8')

# Process the inference result
orig_image = cv2.imread('baggage.jpg')
image_height, image_width, _ = orig_image.shape
model_height, model_width = 300, 300
x_ratio = image_width/model_width
y_ratio = image_height/model_height

if 'boxes' in result:
    for idx,(x1,y1,x2,y2,conf,lbl) in enumerate(result['boxes']):
        # Draw Bounding Boxes
        x1, x2 = int(x_ratio*x1), int(x_ratio*x2)
        y1, y2 = int(y_ratio*y1), int(y_ratio*y2)
        color = (random.randint(10,255), random.randint(10,255), random.randint(10,255))
        cv2.rectangle(orig_image, (x1,y1), (x2,y2), color, 4)
        cv2.putText(orig_image, f"Class: {int(lbl)}", (x1,y1-40), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)
        cv2.putText(orig_image, f"Conf: {int(conf*100)}", (x1,y1-10), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)
        if 'masks' in result:
            # Draw Masks
            mask = cv2.resize(np.asarray(result['masks'][idx]), dsize=(image_width, image_height), interpolation=cv2.INTER_CUBIC)
            for c in range(3):
                orig_image[:,:,c] = np.where(mask>0.5, orig_image[:,:,c]*(0.5)+0.5*color[c], orig_image[:,:,c])

if 'probs' in result:
    # Find Class
    lbl = result['probs'].index(max(result['probs']))
    color = (random.randint(10,255), random.randint(10,255), random.randint(10,255))
    cv2.putText(orig_image, f"Class: {int(lbl)}", (20,20), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)

plt.imshow(cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB))
plt.show()

WaiterError: Waiter ObjectExists failed: Max attempts exceeded. Previously accepted state: Matched expected HTTP status code: 404