# Deploy YOLO Detection Endpoint on SageMaker

This notebook creates a real-time YOLO inference endpoint that Lambda can call.

In [None]:
# Install dependencies
!pip install sagemaker boto3 ultralytics -q

In [None]:
import boto3
import sagemaker
from sagemaker.pytorch import PyTorchModel
import tarfile
import os

# Configuration
ROLE = 'arn:aws:iam::405474549540:role/rt-eco-sagemaker-role'
BUCKET = 'economic-forecast-models'
REGION = 'us-east-2'

session = sagemaker.Session()
s3 = boto3.client('s3')

print(f"SageMaker Session: {session}")
print(f"Region: {REGION}")

## Step 1: Package the Model

In [None]:
# Create model package directory
os.makedirs('model_package', exist_ok=True)

# Download YOLO model from S3 (or use pretrained)
try:
    s3.download_file(BUCKET, 'yolo/ports/best.pt', 'model_package/best.pt')
    print("Downloaded custom YOLO model")
except:
    # Use pretrained model
    from ultralytics import YOLO
    model = YOLO('yolov8n.pt')
    model.save('model_package/best.pt')
    print("Using pretrained YOLOv8n model")

# Copy inference script
!cp ../aws/sagemaker/inference.py model_package/
!cp ../aws/sagemaker/requirements.txt model_package/

# Create tar.gz
with tarfile.open('model.tar.gz', 'w:gz') as tar:
    tar.add('model_package', arcname='.')

print("Model package created: model.tar.gz")

In [None]:
# Upload to S3
model_s3_path = f's3://{BUCKET}/endpoints/yolo-detection/model.tar.gz'

s3.upload_file('model.tar.gz', BUCKET, 'endpoints/yolo-detection/model.tar.gz')
print(f"Uploaded model to: {model_s3_path}")

## Step 2: Create SageMaker Model

In [None]:
from sagemaker.pytorch import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=model_s3_path,
    role=ROLE,
    framework_version='2.0.0',
    py_version='py310',
    entry_point='inference.py',
    source_dir='model_package'
)

print("PyTorch Model created")

## Step 3: Deploy Endpoint

In [None]:
# Deploy endpoint (takes 5-10 minutes)
ENDPOINT_NAME = 'yolo-detection-endpoint'

predictor = pytorch_model.deploy(
    instance_type='ml.m5.xlarge',  # $0.23/hour
    initial_instance_count=1,
    endpoint_name=ENDPOINT_NAME
)

print(f"\n‚úÖ Endpoint deployed: {ENDPOINT_NAME}")
print(f"   Cost: ~$0.23/hour while running")

## Step 4: Test the Endpoint

In [None]:
import json
import base64
from PIL import Image
import matplotlib.pyplot as plt

# Download a test image
s3.download_file('economic-forecast-raw', 
                 'satellite/google_earth/Port_of_LA/2024/2024-2.jpg',
                 'test_image.jpg')

# Read image
with open('test_image.jpg', 'rb') as f:
    image_bytes = f.read()

# Call endpoint
runtime = boto3.client('sagemaker-runtime')

response = runtime.invoke_endpoint(
    EndpointName=ENDPOINT_NAME,
    ContentType='application/x-image',
    Body=image_bytes
)

result = json.loads(response['Body'].read().decode())

print(f"\nüîç Detection Results:")
print(f"   Total detections: {result['total_count']}")
for det in result['detections'][:10]:
    print(f"   ‚Ä¢ {det['class']}: {det['confidence']:.2f}")

In [None]:
# Display annotated image
import io

annotated_bytes = base64.b64decode(result['annotated_image'])
annotated_img = Image.open(io.BytesIO(annotated_bytes))

plt.figure(figsize=(15, 10))
plt.imshow(annotated_img)
plt.title(f"YOLO Detection: {result['total_count']} objects detected")
plt.axis('off')
plt.show()

## Step 5: Delete Endpoint (When Done)

**IMPORTANT: Run this to stop charges!**

In [None]:
# DELETE ENDPOINT TO STOP CHARGES
# Uncomment and run when done with demo

# predictor.delete_endpoint()
# print("Endpoint deleted - no more charges!")

## Endpoint Info for Lambda

Use this in your Lambda function:
```python
ENDPOINT_NAME = 'yolo-detection-endpoint'
```