In [29]:
## This is a sample notebook which contains some of the code we used to apply auto-scaling on our model endpoints 

import sagemaker, boto3, json
from sagemaker import get_execution_role, session
from sagemaker.session import production_variant
from sagemaker import Predictor
from sagemaker import image_uris, model_uris, script_uris
from ast import literal_eval
from urllib.parse import urlparse
import os
import time
import numpy as np
import pandas as pd

aws_region = boto3.Session().region_name

sess = boto3.Session()
sm = sess.client("sagemaker")
sagemaker_client = boto3.client('sagemaker-runtime')
role = get_execution_role()
sagemaker_session = sagemaker.Session(boto_session = sess)
bucket = sagemaker_session.default_bucket()
region = boto3.Session().region_name
sm_session = session.Session(boto3.Session())

image_model_s3_uri = f's3://sagemaker-us-east-1-233328792017/project-image-model/model/image_model.tar.gz'
image_endpoint_name = f'image-model-endpoint' ## replace with actual endpoint name 

text_model_id, text_model_version = 'tensorflow-tc-bert-en-uncased-L-12-H-768-A-12-2', '*'
text_model_s3_uri = f's3://sagemaker-us-east-1-233328792017/project-text-model/data/output/jumpstart-example-tensorflow-tc-bert-en-2023-06-18-07-56-14-807/output/model.tar.gz'
text_endpoint_name = f'text-model-endpoint' ## replace with actual endpoint name 

as_client = boto3.client("application-autoscaling")

def auto_scale_variant(endpoint_name, variant_name):

    resource_id = 'endpoint/' + endpoint_name + '/variant/' + variant_name

    response1 = as_client.register_scalable_target(

        ServiceNamespace = 'sagemaker',
        ResourceId = resource_id,
        ScalableDimension = 'sagemaker:variant:DesiredInstanceCount',
        MinCapacity = 1,
        MaxCapacity = 2

    )

    #Example 1 - SageMakerVariantInvocationsPerInstance Metric
    response2 = as_client.put_scaling_policy(

        PolicyName = 'Invocations-ScalingPolicy',
        ServiceNamespace = 'sagemaker',
        ResourceId = resource_id, 
        ScalableDimension = 'sagemaker:variant:DesiredInstanceCount',
        PolicyType = 'TargetTrackingScaling',
        TargetTrackingScalingPolicyConfiguration = {

            'TargetValue': 10.0,
            'PredefinedMetricSpecification': {

                'PredefinedMetricType': 'SageMakerVariantInvocationsPerInstance',

            },

            'ScaleInCooldown': 600,
            'ScaleOutCooldown': 300

        }

    )

    #Example 2 - CPUUtilization metric
    response3 = as_client.put_scaling_policy(

        PolicyName = 'CPUUtil-ScalingPolicy',
        ServiceNamespace = 'sagemaker',
        ResourceId = resource_id,
        ScalableDimension = 'sagemaker:variant:DesiredInstanceCount',
        PolicyType = 'TargetTrackingScaling',
        TargetTrackingScalingPolicyConfiguration = {

            'TargetValue': 90.0,
            'CustomizedMetricSpecification':

            {
                'MetricName': 'CPUUtilization',
                'Namespace': '/aws/sagemaker/Endpoints',
                'Dimensions': [

                    {'Name': 'EndpointName', 'Value': endpoint_name},
                    {'Name': 'VariantName','Value': 'Variant1'}

                ],

                'Statistic': 'Average',
                'Unit': 'Percent'
            },

            'ScaleInCooldown': 600,
            'ScaleOutCooldown': 300

        }

    )


In [30]:
image_model_container_uri = sagemaker.image_uris.retrieve(
    
    framework = 'image-classification', 
    region = region, 
    version = '1.7-1'

)

image_model_1_name = 'image-variant-1'
image_model_2_name = 'image-variant-2'

sm_session.create_model(
    name = image_model_1_name, role=role, container_defs = {"Image": image_model_container_uri, "ModelDataUrl": image_model_s3_uri}
)

sm_session.create_model(
    name = image_model_2_name, role=role, container_defs = {"Image": image_model_container_uri, "ModelDataUrl": image_model_s3_uri}
)

variant1 = production_variant(model_name = image_model_1_name,
                              instance_type = "ml.m5.xlarge",
                              initial_instance_count = 1,
                              variant_name = 'Variant1',
                              initial_weight = 0.5
                             )
                              
variant2 = production_variant(model_name = image_model_2_name,
                              instance_type = "ml.m5.xlarge",
                              initial_instance_count = 1,
                              variant_name = 'Variant2',
                              initial_weight = 0.5
                             )


Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: 1.7-1.
Using already existing model: image-variant-1
Using already existing model: image-variant-2


In [18]:
# image_endpoint = image_model.deploy(
    
#     initial_instance_count = 1, 
#     instance_type = 'ml.m5.large', 
#     endpoint_name = image_endpoint_name

# )

image_endpoint = sm_session.endpoint_from_production_variants(
    
    name = image_endpoint_name,
    production_variants = [variant1, variant2]
    
)

auto_scale_variant(image_endpoint_name, 'Variant1')
auto_scale_variant(image_endpoint_name, 'Variant2')

------!

In [25]:
text_model_container_uri = sagemaker.image_uris.retrieve(
    
    model_id = text_model_id,
    model_version = text_model_version,
    region = region,
    framework = None,
    image_scope = 'inference',
    instance_type = 'ml.m5.xlarge'
    
)

text_model = sagemaker.model.Model(
    
    image_uri = text_model_container_uri,
    model_data = text_model_s3_uri,
    role = role
    
)


In [28]:
# text_endpoint = text_model.deploy(
    
#     initial_instance_count = 1,
#     instance_type = 'ml.m5.xlarge',
#     entry_point = 'inference.py',
#     endpoint_name = text_endpoint_name
    
# )

auto_scale_variant(text_endpoint_name, 'AllTraffic')

In [11]:
image_predictor = Predictor(endpoint_name = image_endpoint_name)

text_predictor = Predictor(endpoint_name = text_endpoint_name)


In [None]:
from PIL import Image
from IPython.display import display
 
file_name = "./uploaded_images/Signature.jpg"

with open(file_name, "rb") as f:
    image_payload = f.read()
    image_payload = bytearray(image_payload)

image_prediction = image_predictor.predict(
    
    image_payload, 
    initial_args = {"ContentType": "application/x-image"}

)

print(literal_eval(image_prediction.decode('utf-8')))


In [None]:
image_response = sagemaker_client.invoke_endpoint(
    EndpointName = image_endpoint_name,
    ContentType = 'application/x-image',
    Body = image_payload
)

print(literal_eval(image_response["Body"].read().decode('utf-8')))


In [None]:
input_text = "digital door block"

text_payload = input_text.encode('utf-8')

text_prediction = text_predictor.predict(
    
    text_payload, 
    initial_args = {"ContentType": "text/csv"}

)

print(json.loads(text_prediction)['predictions'][0])


In [None]:
text_response = sagemaker_client.invoke_endpoint(
    EndpointName = text_endpoint_name,
    ContentType = 'text/csv',
    Body = text_payload
)

print(json.loads(text_response["Body"].read())['predictions'][0])


In [None]:
image_endpoint.delete_endpoint()
text_endpoint.delete_endpoint()