In [36]:
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
sess = sagemaker.Session()

default_bucket = sess.default_bucket() # or use your own custom bucket name
account = sess.account_id()
base_job_prefix = 'BirdEnd2End'
region = sagemaker.Session().boto_region_name

In [37]:
# image location
s3_input_data = f"s3://{default_bucket}/{base_job_prefix}/unlabeled/images"
# labelled manifest location
s3_input_manifest = f"s3://{default_bucket}/{base_job_prefix}/pipeline/manifest"


output_s3_uri = f's3://{default_bucket}/{base_job_prefix}/scriptprocessor/output/preprocess'

In [38]:
import boto3
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput, Processor
from sagemaker import get_execution_role

import uuid

container_name = "sagemaker-tf-container"

image_uri = "{}.dkr.ecr.{}.amazonaws.com/{}:2.0".format(account, region, container_name)

script_processor = ScriptProcessor(
    base_job_name = base_job_prefix,
                command=['python3'],
                image_uri=image_uri,
                role=role,
                instance_count=1,
                instance_type='ml.m5.xlarge')

In [39]:
script_processor.run(
    code='pipeline/preprocess.py',
    arguments=["--manifest", "manifest", 
               "--images", "images"],
    inputs=[ProcessingInput(source=s3_input_data, 
                            destination="/opt/ml/processing/input/images/"),
            ProcessingInput(source=s3_input_manifest, 
                            destination="/opt/ml/processing/input/manifest/"),
           ],
    outputs=[
            ProcessingOutput(source="/opt/ml/processing/output/train", destination = output_s3_uri +'/train'),
            ProcessingOutput(source="/opt/ml/processing/output/valid", destination = output_s3_uri +'/valid'),
            ProcessingOutput(source="/opt/ml/processing/output/test", destination = output_s3_uri +'/test'),
            ProcessingOutput(source="/opt/ml/processing/output/classes", destination = output_s3_uri +'/classes'),
        ],
                    )


Job Name:  BirdEnd2End-2022-03-17-13-07-55-816
Inputs:  [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-987720697751/BirdEnd2End/unlabeled/images', 'LocalPath': '/opt/ml/processing/input/images/', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'input-2', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-987720697751/BirdEnd2End/pipeline/manifest', 'LocalPath': '/opt/ml/processing/input/manifest/', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-west-2-987720697751/BirdEnd2End-2022-03-17-13-07-55-816/input/code/preprocess.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3Compress

In [26]:
preprocessing_job_description = script_processor.jobs[-1].describe()

output_config = preprocessing_job_description["ProcessingOutputConfig"]
for output in output_config["Outputs"]:
    output_data = output["S3Output"]["S3Uri"]
    
print(output_data)

s3://sagemaker-us-west-2-987720697751/BirdEnd2End/scriptprocessor/output/preprocess/classes
