## Deploy inference models to AWS SageMaker

Define model parameters and environment.

In [None]:
GPU = True
OBJECT_DETECTION_MODEL = "dfine_x_obj2coco"
AWS_REGION = "eu-west-1"

IMAGE_NAME = "sagemaker_inference_server"
ECR_REPO_NAME = IMAGE_NAME


In [None]:
obj_models = {
    "dfine_s_obj2coco": {
        "CHECKPOINT_FILE": "/workspace/dfine_checkpoints/dfine_s_obj2coco.pth",
        "CONFIG_FILE": "/workspace/D-FINE/configs/dfine/objects365/dfine_hgnetv2_s_obj2coco.yml",
    },
    "dfine_l_coco": {
        "CHECKPOINT_FILE": "/workspace/dfine_checkpoints/dfine_l_coco.pth",
        "CONFIG_FILE": "/workspace/D-FINE/configs/dfine/dfine_hgnetv2_l_coco.yml",
    },
    "dfine_l_obj2coco": {
        "CHECKPOINT_FILE": "/workspace/dfine_checkpoints/dfine_l_obj2coco_e25.pth",
        "CONFIG_FILE": "/workspace/D-FINE/configs/dfine/objects365/dfine_hgnetv2_l_obj2coco.yml",
    },
    "dfine_x_obj2coco": {
        "CHECKPOINT_FILE": "/workspace/dfine_checkpoints/dfine_x_obj2coco.pth",
        "CONFIG_FILE": "/workspace/D-FINE/configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml",
    }
}

CURRENT_INSTANCE_TYPE = "ml.g4dn.xlarge" if GPU else "ml.m5.xlarge"
device = "cpu" if CURRENT_INSTANCE_TYPE.startswith("ml.m5") else "cuda:0"
# model_name = CURRENT_MODEL.replace("_", "-")


1. Build docker image and publish to AWS ECR

In [None]:
!docker build -t {IMAGE_NAME} .

In [None]:
import boto3
account_id = boto3.client("sts").get_caller_identity()["Account"]

ecr_uri = f"{account_id}.dkr.ecr.{AWS_REGION}.amazonaws.com/{ECR_REPO_NAME}"
print(f"ECR URL: {ecr_uri}")
!aws ecr create-repository --repository-name {ECR_REPO_NAME} --region {AWS_REGION} || echo "Repo already exists"

In [None]:
!docker tag {IMAGE_NAME}:latest {ecr_uri}
!docker push {ecr_uri}

In [None]:
ecr_uri = ecr_uri.replace("_loadtest", "") # load test will use same image as inference server

model_name = IMAGE_NAME.replace("_", "-")
execution_role_arn = "arn:aws:iam::354918369325:role/AmazonSageMaker-ExecutionRole"
env_vars = obj_models[OBJECT_DETECTION_MODEL].copy()
env_vars.update({"DEVICE": device})

import sys
sys.path.append("..")
from aws_utils import SageMakerClient
c = SageMakerClient(region_name=AWS_REGION)
c.sagemaker_inference_deploy_pipeline(model_name, ecr_uri, execution_role_arn, env_vars, CURRENT_INSTANCE_TYPE)

In [None]:
from test_inference_server import get_random_coco_images, get_plate_images, get_local_images
import json, boto3
import cv2, base64

sagemaker_runtime = boto3.client('sagemaker-runtime', region_name=AWS_REGION)
def make_aws_sagemaker_request(image_input, model_type):
    if image_input.startswith("http"):
        payload = {
            "image_url": image_input,
            "model": model_type
        }
    else:
        img = cv2.imread(image_input)
        if img is None:
            print(f"Failed to read image {image_input}")
            return {}
        _, img_encoded = cv2.imencode('.png', img)
        image_b64 = base64.b64encode(img_encoded).decode('utf-8')
        payload = {
            "image_base64": image_b64,
            "model": model_type
        }

    response = sagemaker_runtime.invoke_endpoint(
        EndpointName=f"{IMAGE_NAME.replace('_', '-')}-endpoint",
        ContentType="application/json",
        Body=json.dumps(payload),
    )
    return json.loads(response["Body"].read().decode())

alpr_images = get_plate_images()
for image_url in alpr_images:
    print(f"Processing {image_url}")
    result = make_aws_sagemaker_request(image_url, "license_plate_recognition")
    print(result)
    
coco_images = get_random_coco_images(5)
for image_url in coco_images:
    print(f"Processing {image_url}")
    result = make_aws_sagemaker_request(image_url, "object_detection")
    print(result)

local_images = get_local_images()
for image_path in local_images:
    print(f"Processing {image_path}")
    result = make_aws_sagemaker_request(image_path, "object_detection_and_ppe")
    print(result)
