In [1]:
import os
import time
import json
import requests
import tempfile
import numpy as np
import pandas as pd

import boto3
import sagemaker
from sagemaker.inputs import TrainingInput, CreateModelInput
from sagemaker.workflow.steps import TrainingStep, CreateModelStep, ProcessingStep, TransformStep

from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.workflow.parameters import ParameterString
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
from sagemaker.workflow.model_step import ModelStep
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingOutput, ProcessingInput

from datetime import datetime

In [2]:
region = os.environ["AWS_REGION"]
boto_session = boto3.Session(region_name=region)
# sagemaker_boto_client = boto_session.client("sagemaker")


# account = sagemaker_session.boto_session.client("sts").get_caller_identity()["Account"]
s3_client = boto3.client("s3", region_name=region)
sm_client = boto3.client("sagemaker", region_name=region)

sagemaker_session = sagemaker.Session(
    boto_session=boto_session, sagemaker_client=sm_client
)

sm_role = get_execution_role(sagemaker_session=sagemaker_session)

In [3]:
# sm_role

In [4]:
# S3 prefix
bucket = "artwork-content-trial-bucket"
prefix="mch-artwork-content"
train_data_dir_prefix="data"
pipeline_dir_prefix="pipeline-data"
training_input_prefix = "content_ovr_anonymised.csv"

model_approval_status = ParameterString(
    name="ModelApprovalStatus", default_value="Approved"
)


In [5]:
training_input = f"s3://{bucket}/{train_data_dir_prefix}/{training_input_prefix}"


# input_train = ParameterString(
#     name="TrainData",
#     default_value=training_input,
# )

# model_output = ParameterString(name="ModelOutput", default_value=f"s3://{bucket}/model")

In [6]:
train_image_uri = '791574662255.dkr.ecr.us-east-1.amazonaws.com/artwork-content-repo:latest'
estimator_output_path = f"s3://{bucket}/{pipeline_dir_prefix}/"

In [7]:
content_estimator = Estimator(
    image_uri=train_image_uri,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    output_path=estimator_output_path,
    role=sm_role,
)

In [8]:
step_train = TrainingStep(
    name="TrainingStep",
    estimator=content_estimator,
    inputs=training_input,
)

In [9]:
mdl_name="artwork-content-model"

model = Model(
    name=mdl_name,
    image_uri=content_estimator.training_image_uri(),
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    sagemaker_session=sagemaker_session,
    role=sm_role,
)

inputs = CreateModelInput(instance_type="ml.m5.xlarge")

step_model_create = CreateModelStep(name="CreateModelStep", model=model, inputs=inputs)

In [10]:
# timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
# mpg_name = f"artwork-content-{timestamp}"
mpg_name = "MCH-Content-Models"

step_model_registration = RegisterModel(
    name="RegisterModelStep",
    estimator=content_estimator,
    model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["application/json"],
    response_types=["application/json"],
    inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
    transform_instances=["ml.m5.xlarge"],
    model_package_group_name=mpg_name,
    approval_status=model_approval_status,
)

In [11]:
# s3_client.upload_file(Filename="deploy_3.py", Bucket=bucket, Key="code/deploy_3.py")

inference_file_path = 'scoring/inference.py'
inference_prefix='inference'
primary_prefix='code'
# s3_client.upload_file(Filename=f"{inference_file_path}", Bucket=bucket, Key=f"{primary_prefix}/{inference_file_path}")

# deploy_script_uri = f"s3://{bucket}/{primary_prefix}/deploy_3.py"

# timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M")
endpoint_name = 'mch-artwork-content-ep-2'

deployment_processor = SKLearnProcessor(
    framework_version="1.0-1",
    role=sm_role,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    base_job_name=f"{prefix}-deploy",
    sagemaker_session=sagemaker_session,
)


step_deploy_model = ProcessingStep(
    name="DeployContentModel",
    processor=deployment_processor,
    inputs=[
            ProcessingInput(source=f"s3://{bucket}/{primary_prefix}/{inference_file_path}",
                                destination="/opt/ml/processing/input"
                               )
    ],
    job_arguments=[
        "--model_data",
        step_model_create.model.model_data,
        "--inference_prefix",
        inference_prefix,
        "--sm_role",
        sm_role,
        "--endpoint_name",
        endpoint_name
    ],
    code='deploy.py',
    outputs=[
        ProcessingOutput(output_name="endpoint_arn", source="/opt/ml/processing/endpoint_arn")
    ],
)

In [12]:
pipeline_name = "artwork-content-pipeline-demo"

pipeline_steps = [step_train, step_model_create, step_model_registration, step_deploy_model]

pipeline = Pipeline(
    name=pipeline_name,
    parameters=[model_approval_status],
    steps=pipeline_steps,
    sagemaker_session=sagemaker_session
)

In [13]:
pipeline.upsert(role_arn=sm_role)

No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


{'PipelineArn': 'arn:aws:sagemaker:us-east-1:791574662255:pipeline/artwork-content-pipeline-demo',
 'ResponseMetadata': {'RequestId': '4c425c27-6701-4461-803c-e8043d7b5369',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '4c425c27-6701-4461-803c-e8043d7b5369',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '97',
   'date': 'Wed, 25 Jan 2023 12:22:07 GMT'},
  'RetryAttempts': 0}}

In [14]:
execution = pipeline.start()
execution.wait()
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:791574662255:pipeline/artwork-content-pipeline-demo',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:791574662255:pipeline/artwork-content-pipeline-demo/execution/upfny1gkb7nh',
 'PipelineExecutionDisplayName': 'execution-1674649328044',
 'PipelineExecutionStatus': 'Succeeded',
 'PipelineExperimentConfig': {'ExperimentName': 'artwork-content-pipeline-demo',
  'TrialName': 'upfny1gkb7nh'},
 'CreationTime': datetime.datetime(2023, 1, 25, 12, 22, 7, 971000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 1, 25, 12, 34, 37, 926000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:791574662255:user-profile/d-u9ximrvnxnyj/vchitrakathi-dminc-com-f51',
  'UserProfileName': 'vchitrakathi-dminc-com-f51',
  'DomainId': 'd-u9ximrvnxnyj'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:791574662255:user-profile/d-u9ximrvnxnyj/vchitrakathi-dminc-com-f51',
  'UserProfileName': 'vchi

In [15]:
# pipeline.delete()

In [16]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer


viz = LineageTableVisualizer(sagemaker_session)
for execution_step in reversed(execution.list_steps()):
    print(execution_step)
    display(viz.show(pipeline_execution_step=execution_step))
    time.sleep(5)

{'StepName': 'TrainingStep', 'StartTime': datetime.datetime(2023, 1, 25, 12, 22, 9, 207000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 1, 25, 12, 26, 31, 583000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:791574662255:training-job/pipelines-upfny1gkb7nh-TrainingStep-FIoK2S391w'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...l-bucket/data/content_ovr_anonymised.csv,Input,DataSet,ContributedTo,artifact
1,79157...mazonaws.com/artwork-content-repo:latest,Input,Image,ContributedTo,artifact
2,s3://...iningStep-FIoK2S391w/output/model.tar.gz,Output,Model,Produced,artifact


{'StepName': 'CreateModelStep', 'StartTime': datetime.datetime(2023, 1, 25, 12, 26, 32, 425000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 1, 25, 12, 26, 33, 640000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:791574662255:model/pipelines-upfny1gkb7nh-createmodelstep-yhdvzbdih1'}}}


None

{'StepName': 'RegisterModelStep-RegisterModel', 'StartTime': datetime.datetime(2023, 1, 25, 12, 26, 32, 425000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 1, 25, 12, 26, 33, 314000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:791574662255:model-package/mch-content-models/128'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...iningStep-FIoK2S391w/output/model.tar.gz,Input,Model,ContributedTo,artifact
1,79157...mazonaws.com/artwork-content-repo:latest,Input,Image,ContributedTo,artifact
2,mch-content-models-128-Approved-1674649593-aws...,Input,Approval,ContributedTo,action
3,MCH-Content-Models-1672381109-aws-model-packag...,Output,ModelGroup,AssociatedWith,context


{'StepName': 'DeployContentModel', 'StartTime': datetime.datetime(2023, 1, 25, 12, 26, 32, 425000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2023, 1, 25, 12, 34, 37, 427000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'AttemptCount': 0, 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:791574662255:processing-job/pipelines-upfny1gkb7nh-deploycontentmodel-infhoswxmq'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...b01c677f38429da1aac/input/code/deploy.py,Input,DataSet,ContributedTo,artifact
1,s3://...t-trial-bucket/code/scoring/inference.py,Input,DataSet,ContributedTo,artifact
2,68331...com/sagemaker-scikit-learn:1.0-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://...h/DeployContentModel/output/endpoint_arn,Output,DataSet,Produced,artifact
