In [None]:
from sagify.config.config import ConfigManager
import os

config = ConfigManager(".sagify.json").get_config()
role = os.getenv("ROLE_ARN")
bucket_uri = os.getenv("S3_BUCKET_URI")

# Create artifact

In [None]:
from pathlib import Path
import tarfile

source_path = Path("tmp/model")
target_path = Path("tmp/artifact/model.tar.gz")

with tarfile.open(target_path, "w:gz") as file:
    for path in source_path.glob("*"):
        file.add(path, arcname=path.relative_to(source_path))

# Upload model to bucket

In [None]:
from sagify.sagemaker.sagemaker import SageMakerClient


sage_maker_client = SageMakerClient(config.aws_profile, config.aws_region, role)

input_dir = "tmp/artifact"
s3_dir = "inference/mle-on-aws-book"
sage_maker_client.upload_data(input_dir, s3_dir)

# Push image

In [None]:
!sagify push

# Deploy serverless endpoint

In [None]:
from sagemaker.model import Model
from sagemaker.serverless.serverless_inference_config import ServerlessInferenceConfig
import boto3
import sagemaker


def construct_image_location(image_name: str, boto_session: boto3.Session) -> str:
    account = boto_session.client("sts").get_caller_identity()["Account"]
    region = boto_session.region_name
    return "{account}.dkr.ecr.{region}.amazonaws.com/{image}".format(
        account=account, region=region, image=image_name
    )


serverless_config = ServerlessInferenceConfig(memory_size_in_mb=1024, max_concurrency=1)
boto_session = boto3.Session(
    region_name=config.aws_region, profile_name=config.aws_profile
)
image_uri = f"{construct_image_location(config.image_name, boto_session)}:latest"
print(image_uri)
sagemaker_session = sagemaker.Session(boto_session=boto_session)
model = Model(
    image_uri=image_uri,
    model_data=f"{bucket_uri}/inference/mle-on-aws-book/model.tar.gz",
    name="mle-on-aws-book",
    sagemaker_session=sagemaker_session,
    role=role,
)
endpoint_name = "mle-on-aws-book"
model.deploy(serverless_inference_config=serverless_config, endpoint_name=endpoint_name)

# Inference

In [None]:
from random import random
from sagemaker import Session
from sagemaker.predictor import Predictor
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer

endpoint = "mle-on-aws-book"
boto_session = boto3.Session(region_name="us-east-1")
sagemaker_session = Session(boto_session)

predictor = Predictor(
    endpoint,
    sagemaker_session=sagemaker_session,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
)

for _ in range(10):
    print(predictor.predict({"x": random()}))

# Local inference

In [None]:
from random import random
import requests

input = random()
response = requests.post("http://localhost:8080/invocations", json={"x": input})
print(response)
response.json()
print(f"Input: {input}, Prediction: {response.json()}")

# Load test

In [None]:
from multiprocessing.pool import ThreadPool

from tqdm.auto import tqdm


def invoke(input):
    response = requests.post("http://localhost:8080/invocations", json={"x": input})
    # print(f"Input: {input}, Prediction: {response.json()}")


n = 1000
with ThreadPool(10) as pool:
    with tqdm(total=n) as pbar:
        for _ in pool.imap_unordered(invoke, [random() for _ in range(n)]):
            pbar.update(1)