# Approve and Deploy the Model

The pipeline that was executed created a Model Package version within the specified Model Package Group. Of particular note, the registration of the model/creation of the Model Package was done so with approval status as `PendingManualApproval`.

As part of SageMaker Pipelines, data scientists can register the model with approved/pending manual approval as part of the CI/CD workflow.

We can also approve the model using the SageMaker Studio UI or programmatically as shown below.

![Pipeline](img/generative_ai_pipeline_rlhf_plus.png)

In [2]:
import psutil

notebook_memory = psutil.virtual_memory()
print(notebook_memory)

if notebook_memory.total < 32 * 1000 * 1000 * 1000:
    print('*******************************************')    
    print('YOU ARE NOT USING THE CORRECT INSTANCE TYPE')
    print('PLEASE CHANGE INSTANCE TYPE TO  m5.2xlarge ')
    print('*******************************************')
else:
    correct_instance_type=True

svmem(total=33229979648, available=9676947456, percent=70.9, used=23085957120, free=3399323648, active=24093073408, inactive=4962443264, buffers=0, cached=6744698880, shared=1466368, slab=317378560)


In [3]:
from botocore.exceptions import ClientError

import os
import sagemaker
import logging
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
region = boto3.Session().region_name

import botocore.config

config = botocore.config.Config(
    user_agent_extra='dsoaws/2.0'
)

sm = boto3.Session().client(service_name="sagemaker", 
                            region_name=region,
                            config=config)

In [4]:
%store -r role

# List Pipeline Execution Steps


In [5]:
%store -r pipeline_name

In [6]:
print(pipeline_name)

dialogue-summary-pipeline-1681687171


In [7]:
%%time

import time
from pprint import pprint

executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

while pipeline_execution_status == "Executing":
    try:
        executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
        pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
    except Exception as e:
        print("Please wait...")
        time.sleep(30)

pprint(executions_response)

Succeeded
[{'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:371366150581:pipeline/dialogue-summary-pipeline-1681687171/execution/gv4ginwwqazx',
  'PipelineExecutionDisplayName': 'execution-1681687178039',
  'PipelineExecutionStatus': 'Succeeded',
  'StartTime': datetime.datetime(2023, 4, 16, 23, 19, 37, 900000, tzinfo=tzlocal())}]
CPU times: user 10.9 ms, sys: 1.17 ms, total: 12.1 ms
Wall time: 248 ms


In [8]:
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

Succeeded


In [9]:
pipeline_execution_arn = executions_response[0]["PipelineExecutionArn"]
print(pipeline_execution_arn)

arn:aws:sagemaker:us-east-1:371366150581:pipeline/dialogue-summary-pipeline-1681687171/execution/gv4ginwwqazx


In [10]:
from pprint import pprint

steps = sm.list_pipeline_execution_steps(PipelineExecutionArn=pipeline_execution_arn)

pprint(steps)

{'PipelineExecutionSteps': [{'AttemptCount': 0,
                             'EndTime': datetime.datetime(2023, 4, 16, 23, 40, 35, 337000, tzinfo=tzlocal()),
                             'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:371366150581:model-package/summarization-1681687175/1'}},
                             'StartTime': datetime.datetime(2023, 4, 16, 23, 40, 34, 483000, tzinfo=tzlocal()),
                             'StepName': 'Summarization-RegisterModel',
                             'StepStatus': 'Succeeded'},
                            {'AttemptCount': 0,
                             'EndTime': datetime.datetime(2023, 4, 16, 23, 40, 35, 846000, tzinfo=tzlocal()),
                             'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:371366150581:model/pipelines-gv4ginwwqazx-createmodel-sjibnovcpa'}},
                             'StartTime': datetime.datetime(2023, 4, 16, 23, 40, 34, 483000, tzinfo=tzlocal()),
                    

# View Registered Model

In [11]:
for execution_step in steps["PipelineExecutionSteps"]:
    if execution_step["StepName"] == "Summarization-RegisterModel":
        model_package_arn = execution_step["Metadata"]["RegisterModel"]["Arn"]
        break
print(model_package_arn)

arn:aws:sagemaker:us-east-1:371366150581:model-package/summarization-1681687175/1


In [12]:
model_package_update_response = sm.update_model_package(
    ModelPackageArn=model_package_arn,
    ModelApprovalStatus="Approved",  # Other options are Rejected and PendingManualApproval
)

# View Created Model

In [13]:
for execution_step in steps["PipelineExecutionSteps"]:
    if execution_step["StepName"] == "CreateModel":
        model_arn = execution_step["Metadata"]["Model"]["Arn"]
        break
print(model_arn)

pipeline_model_name = model_arn.split("/")[-1]
print(pipeline_model_name)

arn:aws:sagemaker:us-east-1:371366150581:model/pipelines-gv4ginwwqazx-createmodel-sjibnovcpa
pipelines-gv4ginwwqazx-createmodel-sjibnovcpa


# Create Model Endpoint from Model Registry
More details here:  https://docs.aws.amazon.com/sagemaker/latest/dg/model-registry-deploy.html


In [14]:
import time

timestamp = int(time.time())

model_from_registry_name = "model-from-registry-{}".format(timestamp)
print("Model from registry name : {}".format(model_from_registry_name))

model_registry_package_container = {
    "ModelPackageName": model_package_arn,
}

Model from registry name : model-from-registry-1681688604


In [15]:
from pprint import pprint

create_model_from_registry_response = sm.create_model(
    ModelName=model_from_registry_name, ExecutionRoleArn=role, PrimaryContainer=model_registry_package_container
)
pprint(create_model_from_registry_response)

{'ModelArn': 'arn:aws:sagemaker:us-east-1:371366150581:model/model-from-registry-1681688604',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '92',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Sun, 16 Apr 2023 23:43:24 GMT',
                                      'x-amzn-requestid': '5d871fee-caad-4b7a-9068-7076d7102ab1'},
                      'HTTPStatusCode': 200,
                      'RequestId': '5d871fee-caad-4b7a-9068-7076d7102ab1',
                      'RetryAttempts': 0}}


In [16]:
model_from_registry_arn = create_model_from_registry_response["ModelArn"]
model_from_registry_arn

'arn:aws:sagemaker:us-east-1:371366150581:model/model-from-registry-1681688604'

#### Note: In this workshop, we are intentionally deploying our model to only 1 instance. The general recommendation is to deploy to 2 or more instances for automatic placement across two AZs for high availability.

In [17]:
endpoint_config_name = "model-from-registry-epc-{}".format(timestamp)
print(endpoint_config_name)

create_endpoint_config_response = sm.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.m5.4xlarge",
            "InitialVariantWeight": 1,
            "InitialInstanceCount": 1,
            "ModelName": pipeline_model_name,
            "VariantName": "AllTraffic",
        }
    ],
)

model-from-registry-epc-1681688604


In [18]:
%store -d pipeline_endpoint_name

In [19]:
# Pick up the existing `pipeline_endpoint_name` if it was already created

%store -r pipeline_endpoint_name

no stored variable or alias pipeline_endpoint_name


In [20]:
# try:
#     print("Using existing Pipeline EndpointName: {}".format(pipeline_endpoint_name))
# except NameError:
timestamp = int(time.time())
pipeline_endpoint_name = "model-from-registry-ep-{}".format(timestamp)
print("Created Pipeline EndpointName={}".format(pipeline_endpoint_name))

create_endpoint_response = sm.create_endpoint(
    EndpointName=pipeline_endpoint_name, EndpointConfigName=endpoint_config_name
)
print(create_endpoint_response["EndpointArn"])

Created Pipeline EndpointName=model-from-registry-ep-1681688605
arn:aws:sagemaker:us-east-1:371366150581:endpoint/model-from-registry-ep-1681688605


In [21]:
%store pipeline_endpoint_name

Stored 'pipeline_endpoint_name' (str)


In [22]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}">SageMaker HTTPS Endpoint</a></b>'.format(
            region, pipeline_endpoint_name
        )
    )
)

## DEV

In [55]:
from sagemaker.model import Model
from sagemaker.predictor import Predictor
import time
import json

inference_image_uri = sagemaker.image_uris.retrieve(
    framework="pytorch",
    region=region,
    version="1.13",
    instance_type='ml.m5.4xlarge',
    image_scope="inference",
)
print(inference_image_uri)

model_name = "summary-{}".format(str(int(time.time())))
model = Model(
    name=model_name,
    image_uri=inference_image_uri,
    model_data="s3://sagemaker-us-east-1-371366150581/pipelines-h3ntw128n6fs-Train-8p4kfuQulg/output/model.tar.gz",
    source_dir='src',
    entry_point='inference.py',
    sagemaker_session=sess,
    role=role,
)

763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-inference:1.13-cpu-py39


In [56]:
endpoint_name = model_name.replace('summary', 'summary-endpoint')
model.deploy(initial_instance_count=1, endpoint_name=endpoint_name, instance_type='ml.m5.4xlarge')

----!

In [57]:
endpoint_name

'summary-endpoint-1681680679'

In [66]:
import json
from sagemaker import Predictor

zero_shot_prompt = """Summarize the following conversation.

#Person1#: What time is it, Tom?
#Person2#: Just a minute. It's ten to nine by my watch.
#Person1#: Is it? I had no idea it was so late. I must be off now.
#Person2#: What's the hurry?
#Person1#: I must catch the nine-thirty train.
#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.

Summary:"""
predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sess,
)
response = predictor.predict(zero_shot_prompt,
        {
            "ContentType": "application/x-text",
            "Accept": "application/json",
        },
)
response_json = json.loads(response.decode('utf-8'))
print(response_json)

Tom is late.


In [67]:
predictor.delete_endpoint()

# _Wait Until the Endpoint is Deployed_
_Note:  This will take a few minutes.  Please be patient._

In [23]:
%%time

waiter = sm.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=pipeline_endpoint_name)

CPU times: user 46.6 ms, sys: 11.7 ms, total: 58.3 ms
Wall time: 1min 30s


# _Wait Until the Endpoint ^^ Above ^^ is Deployed_