# Deploy the latest approved model as a real time endpoing


---

This notebook's CI test result for us-west-2 is as follows. CI test results in other regions can be found at the end of the notebook. 

![This us-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-2/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

---

### This is part 3 of the solution:

1. Utilizes the latest approved model to create a real time endpoint

2. Runs inferences for testing the real time deployed endpoint

In [None]:
!pip install -U sagemaker --quiet # Ensure correct version of SageMaker is installed

In [None]:
## Install the necessary boto3 and sagemaker libraries to initialize session
import json
import time
import boto3
import sagemaker
from utils import *
import sagemaker.session
from typing import Dict, List, Optional, Tuple, Union
from sagemaker.workflow.pipeline_context import PipelineSession

In [None]:
## set the logger to track all of the logs as this pipeline runs
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

### Load the Config.yml file that contains information that is used across this pipeline

In [None]:
config = load_config('config.yml')
logger.info(json.dumps(config, indent=2))

In [None]:
## initialize the sagemaker session, region, role bucket and pipeline session
session = sagemaker.session.Session()
pipeline_session = PipelineSession()
role = config['aws']['sagemaker_execution_role']
session_bucket = session.default_bucket()

logger.info(f"the pipeline bucket being used for this pipeline execution -> {session_bucket}")
logger.info(f"the sagemaker execution role being used across this pipeline -> {role}")

In [None]:
import tarfile
import os

## represents the source path of the inference file
inference_dir_path = config['dir_scripts']['batch_inference'] 

# Define the name of the output .tar.gz file
output_filename = os.path.basename(inference_dir_path) + '.tar.gz'
output_filepath = '/tmp/' + output_filename  # Temporary path to store the archive

# Compress the directory or file
with tarfile.open(output_filepath, "w:gz") as tar:
    tar.add(inference_dir_path, arcname=os.path.basename(inference_dir_path))

print(f"Archive created at {output_filepath}")

## upload the compressed inference file into s3 to have it be used during inference and deploy the model
compressed_inference_script_uri = session.upload_data(
    path=output_filepath, 
    key_prefix=prefix + "/inference/mlops"  
)

logger.info(f"Compressed inference script uploaded to: {compressed_inference_script_uri}")

### Now, step is to get the latest approved model from the registry and deploy it as a real time endpoint
---
Finally, approve the model to launch the model deployment process

In [None]:
## represents the framework version
FRAMEWORK_VERSION = "0.23-1"

# Fetch container to use for training
image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region=config['aws']['region'],
    version=FRAMEWORK_VERSION,
    py_version="py3",
    instance_type=config['input_params']['processing_instance_type'],
)

In [None]:
sm = boto3.client("sagemaker")

# Initialize the latest approved model package ARN to None
latest_approved_model_package_arn = None

# List all model packages and select the first one with 'Approved' status
for p in sm.get_paginator('list_model_packages').paginate(
        ModelPackageGroupName=config['general']['model_group'],
        SortBy="CreationTime",
        SortOrder="Descending",
    ):
    for package in p["ModelPackageSummaryList"]:
        
        if package['ModelApprovalStatus'] == 'Approved':
            latest_approved_model_package_arn = package["ModelPackageArn"]
            break  
            
    if latest_approved_model_package_arn:
        break  

if latest_approved_model_package_arn is None:
    raise Exception(f"No approved model package is found for {config['general']['model_group']} model package group")

# Print the latest approved model package ARN
print(f"Latest approved model package ARN: {latest_approved_model_package_arn}")


In [None]:
latest_approved_model_package = sm.describe_model_package(ModelPackageName=latest_approved_model_package_arn)

## getting the model data for the latest, appoved model
model_data_url = latest_approved_model_package['InferenceSpecification']['Containers'][0]['ModelDataUrl']

logger.info(f"the model data url for the given approved model is -> {model_data_url}")

In [None]:
from time import gmtime, strftime
model_name = config['general']['model_name'] + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name : {}".format(model_name))
container_list = [{
    'Image': image_uri,
    'ModelDataUrl': model_data_url,
    'Environment': {
        'SAGEMAKER_PROGRAM': 'inference.py',  
        'SAGEMAKER_SUBMIT_DIRECTORY': compressed_inference_script_uri, 
    }
}]

## create the model object and call deploy on it
create_model_response = sm.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    Containers=container_list
)

print("Model arn : {}".format(create_model_response["ModelArn"]))

print("Model data url : {}".format(model_data_url))

print("Model image uri : {}".format(image_uri))

#### Creating the endpoint config

In [None]:
endpoint_config_name = config['general']['endpoint_config_name'] + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
instance_type = "ml.m5.xlarge"

print(endpoint_config_name)

create_endpoint_config_response = sm.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType': instance_type,
        ## have max instance count configured here
        'InitialInstanceCount': 1,
        'InitialVariantWeight': 1,
        'ModelName': model_name,
        'VariantName': 'AllTraffic', 
        ## change your managed instance configuration here
        "ManagedInstanceScaling":{
            "MaxInstanceCount": 3,
            "MinInstanceCount": 1,
            "Status": "ENABLED",}
         }])
    
print(create_endpoint_config_response["EndpointConfigArn"])


### Run the cell below if you want to update your endpoint config

In [None]:
## Represents the new configuration added below (add your new model package arn below)

# response = sm.update_endpoint(
#     EndpointName=endpoint_name,
#     EndpointConfigName=endpoint_config_name
# )

# print(response)

## Now finally, deploying this as a real time endpoint

In [None]:
endpoint_name = config['general']['endpoint_name'] + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

create_endpoint_response = sm.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print(create_endpoint_response['EndpointArn'])

# wait for endpoint to reach a terminal state (InService) using describe endpoint
describe_endpoint_response = sm.describe_endpoint(EndpointName=endpoint_name)

while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = sm.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)

describe_endpoint_response

In [None]:
smr = boto3.client('sagemaker-runtime')

## create this from the config param.
body_str = "total_extended_price,avg_discount,total_quantity\n1,2,3\n66.77,12,2"

response = smr.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=body_str.encode('utf-8') ,
    ContentType='text/csv',
)

response_str = response["Body"].read().decode()
response_str

## Notebook CI Test Results

This notebook was tested in multiple regions. The test results are as follows, except for us-west-2 which is shown at the top of the notebook.

![This us-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This us-east-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-2/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This us-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This ca-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ca-central-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This sa-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/sa-east-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This eu-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This eu-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-2/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This eu-west-3 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-3/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This eu-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-central-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This eu-north-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-north-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This ap-southeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This ap-southeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-2/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This ap-northeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This ap-northeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-2/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)

![This ap-south-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-south-1/sagemaker-pipeline-parameterization|parameterized-pipeline.ipynb)
