In [1]:
import boto3
import sagemaker
import sagemaker.session
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.workflow.properties import PropertyFile
from sagemaker.processing import Processor, ProcessingInput, ProcessingOutput

In [2]:
# environment variables
instance_count = 1
instance_type = 'ml.m5.large'
region = boto3.Session().region_name
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.session.Session()
image_uri = '478704051461.dkr.ecr.us-east-1.amazonaws.com/ons-ds-entityrecognition-pipeline:latest'

In [3]:
# creating script processor with image from ecr
script_processor = ScriptProcessor(
    image_uri=image_uri,
    command=["python3"],
    instance_type=instance_type,
    instance_count=instance_count,
    base_job_name="script_processor_job",
    role=role,
)

In [4]:
# processing workload
predefined_workload_report = PropertyFile(
    name="WorkloadReport",
    output_name="predefined_workload",
    path="predefined.json"
)
inequalities_workload_report = PropertyFile(
    name="WorkloadReport",
    output_name="inequalities_workload",
    path="inequalities.json"
)
step_processing_workload = ProcessingStep(
    name="ProcessingWorkload",
    processor=script_processor,
    outputs=[
        ProcessingOutput(
            output_name="predefined_workload", 
            source="/opt/ml/processing/predefined_workload"
        ),
        ProcessingOutput(
            output_name="inequalities_workload", 
            source="/opt/ml/processing/inequalities_workload"
        ),
    ],
    code="scripts/process_workload.py",
    property_files=[
        predefined_workload_report,
        inequalities_workload_report
    ],
)

In [5]:
# train_predefined
predefined_training_report = PropertyFile(
    name="TrainingReportPredefined",
    output_name="predefined_training",
    path="predefined.json"
)
step_training_predefined = ProcessingStep(
    name="TrainingSequencePredefined",
    processor=script_processor,
    inputs=[
        ProcessingInput(
            source=step_processing_workload.properties.ProcessingOutputConfig.Outputs[
                "predefined_workload"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/predefined_workload"
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="predefined_training", 
            source="/opt/ml/processing/predefined_training"
        ),
    ],
    code="scripts/predefined_training_sequence.py",
    property_files=[
        predefined_training_report
    ],
)

In [6]:
# train_inequalities
inequalities_training_report = PropertyFile(
    name="TrainingReportInequalities",
    output_name="inequalities_training",
    path="inequalities.json"
)
step_training_inequalities = ProcessingStep(
    name="TrainingSequenceInequalities",
    processor=script_processor,
    inputs=[
        ProcessingInput(
            source=step_processing_workload.properties.ProcessingOutputConfig.Outputs[
                "inequalities_workload"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/inequalities_workload"
        )
    ],
    outputs=[
        ProcessingOutput(
            output_name="inequalities_training", 
            source="/opt/ml/processing/inequalities_training"
        ),
    ],
    code="scripts/inequalities_training_sequence.py",
    property_files=[
        inequalities_training_report
    ],
)

In [7]:
# deployment sequence
deployment_step = ProcessingStep(
    name="DeploymentSequence",
    processor=script_processor,
    inputs=[
        ProcessingInput(
            source=step_training_predefined.properties.ProcessingOutputConfig.Outputs[
                "predefined_training"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/predefined_training"
        ),
        ProcessingInput(
            source=step_training_inequalities.properties.ProcessingOutputConfig.Outputs[
                "inequalities_training"
            ].S3Output.S3Uri,
            destination="/opt/ml/processing/inequalities_training"
        ),
    ],
    code="scripts/deployment_sequence.py",
)

In [8]:
pipeline_name = 'mpo-ner-entityrecognition-pipeline'
pipeline = Pipeline(
    name=pipeline_name,
    steps=[
        step_processing_workload, 
        step_training_predefined, 
        step_training_inequalities, 
        deployment_step
    ],
)

In [9]:
import json

json.loads(pipeline.definition())

{'Version': '2020-12-01',
 'Metadata': {},
 'Parameters': [],
 'PipelineExperimentConfig': {'ExperimentName': {'Get': 'Execution.PipelineName'},
  'TrialName': {'Get': 'Execution.PipelineExecutionId'}},
 'Steps': [{'Name': 'ProcessingWorkload',
   'Type': 'Processing',
   'Arguments': {'ProcessingResources': {'ClusterConfig': {'InstanceType': 'ml.m5.large',
      'InstanceCount': 1,
      'VolumeSizeInGB': 30}},
    'AppSpecification': {'ImageUri': '478704051461.dkr.ecr.us-east-1.amazonaws.com/ons-ds-entityrecognition-pipeline:latest',
     'ContainerEntrypoint': ['python3',
      '/opt/ml/processing/input/code/process_workload.py']},
    'RoleArn': 'arn:aws:iam::478704051461:role/service-role/AmazonSageMaker-ExecutionRole-20220317T121940',
    'ProcessingInputs': [{'InputName': 'code',
      'AppManaged': False,
      'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-478704051461/ProcessingWorkload-d5787b01ac27dd74c4cd0a8a5f2ff5d2/input/code/process_workload.py',
       'LocalPath': '/op

In [10]:
pipeline.upsert(role_arn=role)

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:478704051461:pipeline/mpo-ner-entityrecognition-pipeline',
 'ResponseMetadata': {'RequestId': 'dcaf79a8-644a-4d9c-9f0a-73e858faed08',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dcaf79a8-644a-4d9c-9f0a-73e858faed08',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '102',
   'date': 'Mon, 10 Oct 2022 15:27:02 GMT'},
  'RetryAttempts': 0}}

In [11]:
execution = pipeline.start()

In [17]:
execution.describe()

{'PipelineArn': 'arn:aws:sagemaker:us-east-1:478704051461:pipeline/mpo-ner-entityrecognition-pipeline',
 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:478704051461:pipeline/mpo-ner-entityrecognition-pipeline/execution/5o5fl8al48sg',
 'PipelineExecutionDisplayName': 'execution-1665415623802',
 'PipelineExecutionStatus': 'Executing',
 'PipelineExperimentConfig': {'ExperimentName': 'mpo-ner-entityrecognition-pipeline',
  'TrialName': '5o5fl8al48sg'},
 'CreationTime': datetime.datetime(2022, 10, 10, 15, 27, 3, 679000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2022, 10, 10, 15, 27, 3, 679000, tzinfo=tzlocal()),
 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:478704051461:user-profile/d-yzqjj21wgvgn/iavila-tenbu-ons-org-br-6e8',
  'UserProfileName': 'iavila-tenbu-ons-org-br-6e8',
  'DomainId': 'd-yzqjj21wgvgn'},
 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:478704051461:user-profile/d-yzqjj21wgvgn/iavila-tenbu-ons-org-br-6e8',
  'User