In [25]:
import boto3
from sagemaker import Session
from sagemaker import get_execution_role

client = boto3.client(service_name="sagemaker")
runtime = boto3.client(service_name="sagemaker-runtime")

In [20]:
role = get_execution_role()

model_artifacts = 's3://eliezerraj-908671954593-dataset/payment/output/xgboost-240402-2155-012-1711fe34/output/model.tar.gz'
container = '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest'
model_artifacts

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


's3://eliezerraj-908671954593-dataset/payment/output/xgboost-240402-2155-012-1711fe34/output/model.tar.gz'

In [21]:
from time import gmtime, strftime

model_name = "xgboost-serverless-fraud-v2-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name: " + model_name)

# dummy environment variables
byo_container_env_vars = {"SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SOME_ENV_VAR": "myEnvVar"}

create_model_response = client.create_model(
    ModelName=model_name,
    Containers=[
        {
            "Image": container,
            "Mode": "SingleModel",
            "ModelDataUrl": model_artifacts,
            "Environment": byo_container_env_vars,
        }
    ],
    ExecutionRoleArn=role,
)

print("Model Arn: " + create_model_response["ModelArn"])

Model name: xgboost-serverless-fraud-v2-2024-04-02-23-27-08
Model Arn: arn:aws:sagemaker:us-east-2:908671954593:model/xgboost-serverless-fraud-v2-2024-04-02-23-27-08


In [22]:
xgboost_epc_name = "mlops-serverless-epc-fraud-v2-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName=xgboost_epc_name,
    ProductionVariants=[
        {
            "VariantName": "byoVariant",
            "ModelName": model_name,
            "ServerlessConfig": {
                "MemorySizeInMB": 4096,
                "MaxConcurrency": 1,
            },
        },
    ],
)

print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])

Endpoint Configuration Arn: arn:aws:sagemaker:us-east-2:908671954593:endpoint-config/mlops-serverless-epc-fraud-v2-2024-04-02-23-27-10


In [23]:
endpoint_name = "xgboost-serverless-ep-fraud-v2" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=xgboost_epc_name,
)

print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

Endpoint Arn: arn:aws:sagemaker:us-east-2:908671954593:endpoint/xgboost-serverless-ep-fraud-v22024-04-02-23-27-14


In [24]:
# wait for endpoint to reach a terminal state (InService) using describe endpoint
import time

describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)

while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)

describe_endpoint_response

Creating
Creating
Creating
Creating
Creating
Creating
Creating
InService


{'EndpointName': 'xgboost-serverless-ep-fraud-v22024-04-02-23-27-14',
 'EndpointArn': 'arn:aws:sagemaker:us-east-2:908671954593:endpoint/xgboost-serverless-ep-fraud-v22024-04-02-23-27-14',
 'EndpointConfigName': 'mlops-serverless-epc-fraud-v2-2024-04-02-23-27-10',
 'ProductionVariants': [{'VariantName': 'byoVariant',
   'DeployedImages': [{'SpecifiedImage': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
     'ResolvedImage': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost@sha256:0c8f830ac408e6dee08445fb60392e9c3f05f790a4b3c07ec22327c08f75bcbf',
     'ResolutionTime': datetime.datetime(2024, 4, 2, 23, 27, 14, 910000, tzinfo=tzlocal())}],
   'CurrentWeight': 1.0,
   'DesiredWeight': 1.0,
   'CurrentInstanceCount': 0,
   'CurrentServerlessConfig': {'MemorySizeInMB': 4096, 'MaxConcurrency': 1}}],
 'EndpointStatus': 'InService',
 'CreationTime': datetime.datetime(2024, 4, 2, 23, 27, 14, 459000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 4, 2, 23, 2

In [27]:
import numpy as np
# Endpoint invocation
endpoint_name="xgboost-serverless-ep-fraud-v22024-04-02-23-27-14"
payload = b"100., 0., 1., 1., 400., 365.0, 17., 263.529412, 28., 238.714286, 97582.0"

response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=payload,
    ContentType="text/csv",
)

y_predict = response["Body"].read().decode()
print(y_predict)
print(np.round(float(y_predict)))

0.5235762596130371
1.0


In [None]:
client.delete_model(ModelName=model_name)
client.delete_endpoint_config(EndpointConfigName=xgboost_epc_name)
client.delete_endpoint(EndpointName=endpoint_name)