## 1.1 Import Python Libraries

In [26]:
import os, sagemaker, subprocess, boto3
from datetime import datetime
from sagemaker import s3
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorchModel
from sagemaker.deserializers import JSONDeserializer

## 1.2 Install Ultralytics for YOLOv8 model

There are following models to choose from:
1. Object Detection:
    - yolov8l.pt
    - yolov8m.pt
    - yolov8n.pt
    - yolov8s.pt
    - yolov8x.pt
    - yolov8x6.pt
2. Segmentation:
    - yolov8l-seg.pt
    - yolov8m-seg.pt
    - yolov8n-seg.pt
    - yolov8s-seg.pt
    - yolov8x-seg.pt
3. Classification:
    - yolov8l-cls.pt
    - yolov8m-cls.pt
    - yolov8n-cls.pt
    - yolov8s-cls.pt
    - yolov8x-cls.pt
4. Pose:
    - yolov8n-pose.pt
    - yolov8s-pose.pt
    - yolov8m-pose.pt
    - yolov8l-pose.pt
    - yolov8x-pose.pt
    - yolov8x-pose-p6.pt

For further information, check the link: https://docs.ultralytics.com

In [27]:
#!pip3 install ultralytics
from ultralytics import YOLO

## Choose a model:
model_name = 'yolov8x.pt'

YOLO(model_name)
os.system(f'mv {model_name} code/.')

bashCommand = "tar -cpzf  model.tar.gz code/"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:03<00:00, 39.8MB/s] 


## 1.3 Zip the code and model into `model.tar.gz` and upload it to specific S3 bucket
Here permission is granted to the S3 bucket created with CDK and not any other bucket

In [28]:
s3_client = boto3.client('s3')
response = s3_client.list_buckets()
for bucket in response['Buckets']:
    if 'yolov8' in bucket["Name"]:
        bucket = 's3://' + bucket["Name"]
        break

print(f'Bucket: {bucket}')
sess = sagemaker.Session(default_bucket=bucket.split('s3://')[-1])

prefix = "yolov8/model"

Bucket: s3://dev-sagemaker-yolov8


In [29]:
sm_client = boto3.client(service_name="sagemaker")
runtime_sm_client = boto3.client(service_name="sagemaker-runtime")

account_id = boto3.client("sts").get_caller_identity()["Account"]
region = boto3.Session().region_name

role = "arn:aws:iam::637423538863:role/dev-sagemaker-role"
print(f'Role: {role}')

model_data = s3.S3Uploader.upload("model.tar.gz", bucket + "/" + prefix)
print(f'Bucket: {bucket}')
print(f'Model Data: {model_data}')

Role: arn:aws:iam::637423538863:role/dev-sagemaker-role
Bucket: s3://dev-sagemaker-yolov8
Model Data: s3://dev-sagemaker-yolov8/yolov8/model/model.tar.gz


In [30]:
# INSTANCE_TYPE = "ml.m5.2xlarge"
# INSTANCE_TYPE = 'ml.inf2.xlarge'
INSTANCE_TYPE = "ml.g5.xlarge"
# ACCELERATOR_TYPE = 'ml.eia2.xlarge'
ENDPOINT_NAME = "yolov8-pytorch-endpoint"

## 1.4 Create the SageMaker PyTorchModel

In [31]:
model = PyTorchModel(entry_point='inference.py',
                     model_data=model_data, 
                     framework_version='1.12', 
                     py_version='py38',
                     role=role,
                     env={'TS_MAX_RESPONSE_SIZE':'20000000', 'YOLOV8_MODEL': model_name},
                     sagemaker_session=sess)

## 1.4 Compile the SageMaker PyTorchModel (for Inferentia instances)

In [32]:
from sagemaker.utils import name_from_base
import json

if "inf" in INSTANCE_TYPE:
    bucket = sess.default_bucket()
    compilation_job_name = name_from_base("Yolov8-Inf1")
    compiled_model_path = "s3://{}/{}/output".format(bucket, "yolov8")
    print(f"Compiled model path: {compiled_model_path}")
    model.compile(
        target_instance_family="ml_inf2",
        input_shape={"input0": [1, 3, 640, 640]},
        output_path=compiled_model_path,
        framework_version="1.12",
        role=role,
        job_name=compilation_job_name,
        compiler_options=json.dumps("--dtype int64"),
    )

## 1.5 Deploy the model on SageMaker Endpoint:

In [33]:
# Store the endpoint name in the history to be accessed by 2_TestEndpoint.ipynb notebook
%store ENDPOINT_NAME
print(f'Endpoint Name: {ENDPOINT_NAME}')

predictor = model.deploy(initial_instance_count=1, 
                         instance_type=INSTANCE_TYPE,
                         #accelerator_type=ACCELERATOR_TYPE,
                         deserializer=JSONDeserializer(),
                         endpoint_name=ENDPOINT_NAME)

Stored 'ENDPOINT_NAME' (str)
Endpoint Name: yolov8-pytorch-endpoint


--------!