In [1]:
import os
import time
import json
import requests
import tempfile
import numpy as np
import pandas as pd

import boto3
import sagemaker
from sagemaker.inputs import TrainingInput
from sagemaker.workflow.steps import TrainingStep, CreateModelStep, ProcessingStep

from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.workflow.parameters import ParameterString
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
from sagemaker.workflow.model_step import ModelStep
from sagemaker.sklearn.processing import SKLearnProcessor

In [2]:
# region = boto3.Session().region_name
# boto3.setup_default_session(region_name=region)
# boto_session = boto3.Session(region_name=region)

# s3_client = boto3.client("s3", region_name=region)

# sagemaker_boto_client = boto_session.client("sagemaker")
# sagemaker_session = sagemaker.session.Session(
#     boto_session=boto_session, sagemaker_client=sagemaker_boto_client
# )

# print(region, boto_session, s3_client, sagemaker_boto_client, sagemaker_session)

In [3]:
sagemaker_session = sagemaker.session.Session()
account = sagemaker_session.boto_session.client("sts").get_caller_identity()["Account"]
region = sagemaker_session.boto_session.region_name
s3_client = boto3.client("s3", region_name=region)
sm_client = boto3.client("sagemaker", region_name=region)
role = get_execution_role()

# print(sagemaker_session, account, region, role)

In [4]:
# S3 prefix
bucket = "ml-training-data-model"
prefix="mch-artwork-content"
training_input_prefix = "content_ovr_anonymised.csv"
model_approval_status = ParameterString(
    name="ModelApprovalStatus", default_value="Approved"
)


In [5]:
training_input = f"s3://{bucket}/{training_input_prefix}"

In [6]:
image_uri = '791574662255.dkr.ecr.us-east-1.amazonaws.com/artwork-content-model:latest'
model_path = f"s3://{bucket}"

In [7]:

content = Estimator(
    image_uri=image_uri,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=model_path,
    role=role,
)


In [8]:
step_train = TrainingStep(
    name="training",
    estimator=content,
    inputs=training_input,
)

In [9]:
mdl_name="artwork_content_model"
model = Model(
    name=mdl_name,
    image_uri=content.training_image_uri(),
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=PipelineSession(),
    role=role,
)

In [10]:
pipeline_session = PipelineSession()

pipeline_model = PipelineModel(
        models=[model],
        role=role,
        sagemaker_session=pipeline_session,
)

In [11]:
# timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
# mpg_name = f"artwork-content-{timestamp}"
mpg_name = "MCH-Content-Models"

register_model_step_args = pipeline_model.register(
    content_types=["application/json"],
    response_types=["application/json"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    model_package_group_name=mpg_name,
    approval_status=model_approval_status
)



In [12]:
sm_client.list_model_packages(ModelPackageGroupName=mpg_name)

{'ModelPackageSummaryList': [{'ModelPackageGroupName': 'MCH-Content-Models',
   'ModelPackageVersion': 12,
   'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:791574662255:model-package/mch-content-models/12',
   'CreationTime': datetime.datetime(2023, 1, 4, 11, 14, 0, 836000, tzinfo=tzlocal()),
   'ModelPackageStatus': 'Completed',
   'ModelApprovalStatus': 'Approved'},
  {'ModelPackageGroupName': 'MCH-Content-Models',
   'ModelPackageVersion': 11,
   'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:791574662255:model-package/mch-content-models/11',
   'CreationTime': datetime.datetime(2023, 1, 4, 8, 51, 4, 819000, tzinfo=tzlocal()),
   'ModelPackageStatus': 'Completed',
   'ModelApprovalStatus': 'Approved'},
  {'ModelPackageGroupName': 'MCH-Content-Models',
   'ModelPackageVersion': 10,
   'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:791574662255:model-package/mch-content-models/10',
   'CreationTime': datetime.datetime(2023, 1, 4, 5, 30, 53, 445000, tzinfo=tzlocal()),
   'ModelPack

In [13]:
model_name = "artwork-content-model" 
step_model_registration = ModelStep(
   name=model_name,
   step_args=register_model_step_args,
)

In [14]:
step_model_create = ModelStep(
   name=f"{model_name}-creation",
   step_args=model.create(instance_type="ml.m5.xlarge"),
)

In [15]:
step_model_create.properties.ModelName

<sagemaker.workflow.properties.Properties at 0x7f69eff69250>

In [16]:
s3_client.upload_file(Filename="deploy.py", Bucket=bucket, Key="code/deploy.py")
deploy_script_uri = f"s3://{bucket}/code/deploy.py"

deployment_processor = SKLearnProcessor(
    framework_version="1.0-1",
    role=role,
    instance_type="ml.t3.medium",
    instance_count=1,
    base_job_name=f"{prefix}-deploy",
    sagemaker_session=sagemaker_session,
)

deploy_step = ProcessingStep(
    name="DeployModel",
    processor=deployment_processor,
    job_arguments=[
        "--model-name",
        step_model_create.properties.ModelName,
        "--region",
        region,
        "--endpoint-instance-type",
        "ml.m4.xlarge",
        "--endpoint-name",
        "mch-artwork-content-endpoint-1",
    ],
    code=deploy_script_uri,
)

In [17]:
pipeline_name = f"artwork-content-pipeline"

pipeline_steps = [step_train, step_model_create, step_model_registration, deploy_step]

pipeline = Pipeline(
    name=pipeline_name,
    parameters=[model_approval_status],
    steps=pipeline_steps,
)

In [18]:
# json.loads(pipeline.definition())

In [19]:
pipeline.upsert(role_arn=role)

Popping out 'CertifyForMarketplace' from the pipeline definition since it will be overridden in pipeline execution time.


{'PipelineArn': 'arn:aws:sagemaker:us-east-1:791574662255:pipeline/artwork-content-pipeline',
 'ResponseMetadata': {'RequestId': '18f09a83-ec4f-4154-8f3f-696965f47e78',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '18f09a83-ec4f-4154-8f3f-696965f47e78',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '92',
   'date': 'Thu, 05 Jan 2023 05:53:54 GMT'},
  'RetryAttempts': 0}}

In [20]:
execution = pipeline.start()
execution

_PipelineExecution(arn='arn:aws:sagemaker:us-east-1:791574662255:pipeline/artwork-content-pipeline/execution/tcvyht35077h', sagemaker_session=<sagemaker.session.Session object at 0x7f69eff7a8d0>)

In [21]:
# execution.describe()

In [22]:
# execution.wait()

In [23]:
# execution.list_steps()

In [24]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer


viz = LineageTableVisualizer(sagemaker.session.Session())
for execution_step in reversed(execution.list_steps()):
    print(execution_step)
    display(viz.show(pipeline_execution_step=execution_step))
    time.sleep(5)

In [25]:
# s3_client.upload_file(Filename="deploy.py", Bucket=bucket, Key="code/deploy.py")
# deploy_script_uri = f"s3://{bucket}/code/deploy.py"

# deployment_processor = SKLearnProcessor(
#     framework_version="1.0-1",
#     role=role,
#     instance_type="ml.t3.medium",
#     instance_count=1,
#     base_job_name=f"{prefix}-deploy",
#     sagemaker_session=sagemaker_session,
# )

# deploy_step = ProcessingStep(
#     name="DeployModel",
#     processor=deployment_processor,
#     job_arguments=[
#         "--model-name",
#         step_model_registration.name,
#         "--region",
#         region,
#         "--endpoint-instance-type",
#         "ml.m4.xlarge",
#         "--endpoint-name",
#         "mch-artwork-content-endpoint",
#     ],
#     code=deploy_script_uri,
# )

In [26]:
# sklearn_processor = SKLearnProcessor(
#     framework_version="1.0-1",
#     instance_type=processing_instance_type,
#     instance_count=processing_instance_count,
#     base_job_name="comprehend-process",
#     sagemaker_session=sagemaker_session,
#     role=role_arn,
# )

In [27]:
# from sklearn.preprocessing import FunctionTransformer
# step_deploy_model = ProcessingStep(
#     name="ContentDeploy",
#     processor=sklearn_processor,
#     job_arguments=[
#         "--arn-path",
#         step_model_registration.properties.ProcessingOutputConfig.Outputs["arn"].S3Output.S3Uri,
#     ],
#     code="deply.py",
#     outputs=[
#         ProcessingOutput(output_name="endpoint_arn", source="/opt/ml/processing/endpoint_arn")
#     ],
# )