In [27]:
import os, sagemaker, subprocess, boto3
from datetime import datetime
from sagemaker import s3
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorchModel
from sagemaker.deserializers import JSONDeserializer

In [28]:
%%writefile code/requirements.txt
nvgpu
opencv-python
torchvision
seaborn
ultralytics
omegaconf==2.3.0

Overwriting code/requirements.txt


In [29]:
%%writefile code/inference.py
import numpy as np
import torch, os, json, io, cv2, time
from ultralytics import YOLO

def model_fn(model_dir):
    print("Executing model_fn from inference.py ...")
    env = os.environ
    model = YOLO(os.path.join(model_dir, env['YOLOV8_MODEL']))
    return model

def input_fn(request_body, request_content_type):
    print("Executing input_fn from inference.py ...")
    if request_content_type:
        jpg_original = np.load(io.BytesIO(request_body), allow_pickle=True)
        jpg_as_np = np.frombuffer(jpg_original, dtype=np.uint8)
        img = cv2.imdecode(jpg_as_np, flags=-1)
    else:
        raise Exception("Unsupported content type: " + request_content_type)
    return img
    
def predict_fn(input_data, model):
    print("Executing predict_fn from inference.py ...")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    with torch.no_grad():
        result = model(input_data)
    return result
        
def output_fn(prediction_output, content_type):
    print("Executing output_fn from inference.py ...")
    infer = {}
    for result in prediction_output:
        if 'boxes' in result._keys and result.boxes is not None:
            infer['boxes'] = result.boxes.numpy().data.tolist()
        if 'masks' in result._keys and result.masks is not None:
            infer['masks'] = result.masks.numpy().data.tolist()
        if 'keypoints' in result._keys and result.keypoints is not None:
            infer['keypoints'] = result.keypoints.numpy().data.tolist()
        if 'probs' in result._keys and result.probs is not None:
            infer['probs'] = result.probs.numpy().data.tolist()
    return json.dumps(infer)

Overwriting code/inference.py


In [30]:
!pip3 install ultralytics
from ultralytics import YOLO

## Choose a model:
model_name = 'yolov8m.pt'

YOLO(model_name)
os.system(f'mv {model_name} code/.')

bashCommand = "tar -cpzf  model.tar.gz code/"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()



Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m.pt to 'yolov8m.pt'...
100%|██████████| 49.7M/49.7M [00:00<00:00, 399MB/s]


In [31]:
bucket = 'x-beagle'
print(f'Bucket: {bucket}')

sess = sagemaker.Session(default_bucket=bucket)

prefix = "yolov8/final-endpoint"

Bucket: x-beagle


In [32]:
import boto3

# S3 버킷 이름
bucket_name = 'x-beagle'
# 업로드할 파일 이름
file_name = 'model.tar.gz'
# 업로드할 로컬 경로 및 파일 이름
local_file_path = '/home/ec2-user/SageMaker/model.tar.gz'   

# S3 클라이언트 생성
s3_client = boto3.client('s3')

# model.tar.gz 파일을 S3 버킷에 업로드
s3_client.upload_file(local_file_path, bucket_name, file_name)

print(f'{file_name} uploaded to S3 bucket {bucket_name}')


model.tar.gz uploaded to S3 bucket x-beagle


In [33]:
import boto3
from sagemaker import get_execution_role
from sagemaker.pytorch import PyTorchModel
from sagemaker.deserializers import JSONDeserializer

# S3 버킷 이름과 업로드된 모델의 위치
bucket_name = 'x-beagle'
model_artifact = 'model.tar.gz'

# SageMaker 실행 역할 가져오기
role = get_execution_role()

# SageMaker 세션 및 클라이언트 생성
import sagemaker
sagemaker_session = sagemaker.Session()
sagemaker_client = boto3.client('sagemaker')

# 모델 데이터의 S3 경로
model_data = f's3://{bucket_name}/{model_artifact}'

# PyTorch 모델 생성
model = PyTorchModel(entry_point='inference.py',
                     model_data=model_data, 
                     framework_version='1.12', 
                     py_version='py38',
                     role=role,
                     env={'TS_MAX_RESPONSE_SIZE':'20000000', 'YOLOV8_MODEL': model_name},
                     sagemaker_session=sagemaker_session,
                     name='xbeagle-model-pytorch')

In [34]:
# 엔드포인트 이름 생성
ENDPOINT_NAME = 'x-beagle-yolov8-' + str(datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-%f'))

# 엔드포인트 배포
predictor = model.deploy(initial_instance_count=1, 
                         instance_type='ml.m5.4xlarge',
                         deserializer=JSONDeserializer(),
                         endpoint_name=ENDPOINT_NAME)

print(f'Endpoint Name: {ENDPOINT_NAME}')

-------!Endpoint Name: x-beagle-yolov8-2024-05-24-13-22-32-582715


In [35]:
predictor_reuse=sagemaker.predictor.Predictor(
    endpoint_name="xbeagle-yolov8-YYYY-MM-DD-HH-MM-SS-SSS",
    sagemaker_session=sagemaker.Session(),
    serializer=sagemaker.serializers.CSVSerializer()
)

In [11]:
INSTANCE_TYPE = 'ml.g4dn.4xlarge'
ENDPOINT_NAME = 'xbeagle-yolov8-' + str(datetime.utcnow().strftime('%Y-%m-%d-%H-%M-%S-%f'))
%store ENDPOINT_NAME
print(f'Endpoint Name: {ENDPOINT_NAME}')

predictor = model.deploy(initial_instance_count=1, 
                         instance_type=INSTANCE_TYPE,
                         deserializer=JSONDeserializer(),
                         endpoint_name=ENDPOINT_NAME)

Stored 'ENDPOINT_NAME' (str)
Endpoint Name: xbeagle-yolov8-2024-05-24-05-31-12-296466
---------!