## Imports and Setup

In [1]:
import sagemaker
import boto3
import time

from PIL import Image
import numpy as np
import io

profile = 'crayon-site'
region_name='us-east-2'
bucket = 'st-crayon-dev'
prefix = 'sagemaker/labelbox/'
role = 'arn:aws:iam::395166463292:role/service-role/AmazonSageMaker-ExecutionRole-20200714T182988'

from botocore.exceptions import ProfileNotFound

try:
    boto3.setup_default_session(profile_name=profile)
except ProfileNotFound:
    print("crayon-site profile not found. Using default aws profile.")
    

session = boto3.session.Session(profile_name = profile, region_name = region_name)
sess = sagemaker.Session(session,default_bucket=bucket)
sagemaker_client = session.client('sagemaker')
account_id = session.client('sts').get_caller_identity().get('Account')

## Creates an Amazon Elastic Container Registry (Amazon ECR) repository

This step needs the aws cli installed locally, and sets up the repository to store the docker container to run the script processor with.

NOTE: This only needs to be done once!

In [4]:
ecr_repository = 'ss-processing-container-python37'
tag = ':latest'
processing_repository_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region_name, ecr_repository + tag)
processing_repository_uri

'395166463292.dkr.ecr.us-east-2.amazonaws.com/ss-processing-container-python37:latest'

## Build the container using the docker command and Push to ECR

In [5]:
!docker build -t $ecr_repository docker

[1A[1B[0G[?25l[+] Building 0.0s (0/2)                                                         
[?25h[1A[0G[?25l[+] Building 0.2s (2/3)                                                         
[34m => [internal] load .dockerignore                                          0.0s
[0m[34m => => transferring context: 2B                                            0.0s
[0m[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 704B                                       0.0s
[0m => [internal] load metadata for docker.io/library/python:3.7              0.1s
[?25h[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 0.3s (2/3)                                                         
[34m => [internal] load .dockerignore                                          0.0s
[0m[34m => => transferring context: 2B                                            0.0s
[0m[34m => [internal] load build definition from Dockerfile        

In [6]:
!$(aws ecr get-login --region $region_name --registry-ids $account_id --no-include-email --profile $profile)
!aws ecr create-repository --repository-name $ecr_repository --profile $profile

Login Succeeded
{
    "repository": {
        "repositoryArn": "arn:aws:ecr:us-east-2:395166463292:repository/ss-processing-container-python37",
        "registryId": "395166463292",
        "repositoryName": "ss-processing-container-python37",
        "repositoryUri": "395166463292.dkr.ecr.us-east-2.amazonaws.com/ss-processing-container-python37",
        "createdAt": 1598630958.0,
        "imageTagMutability": "MUTABLE",
        "imageScanningConfiguration": {
            "scanOnPush": false
        }
    }
}


Building the container and pushing into Amazon ECR.

NOTE: This only needs to be done once!

In [7]:
!docker tag {ecr_repository + tag} $processing_repository_uri
!docker push $processing_repository_uri

The push refers to repository [395166463292.dkr.ecr.us-east-2.amazonaws.com/ss-processing-container-python37]

[1B
[1B
[1B
[1B
[1B
[1B
[1B
[1B
[1B
[1B
[1B
[1B
[1B
[12Blatest: digest: sha256:9aee0e655c62893eb0da2d10a9071a649648208494198fad24366fba5fc4416a size: 3272


## Set up the ScriptProcessor from the Amazon SageMaker Python SDK to run the script.

Setup ScriptProcessor by pointing it to the docker container we created and specifying the instance count and type that it will run on. The `endpoint` is passed in as an argument.

In [57]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
script_processor = ScriptProcessor(command=['python3'],
                image_uri=processing_repository_uri,
                role=role,
                instance_count=1,
                instance_type='ml.c5.xlarge',
                sagemaker_session=sess,
                env={'endpoint':'endpointvalues'})

In [58]:
endpoints = sagemaker_client.list_endpoints()
endpoint = endpoints['Endpoints'][0]['EndpointName']
print(endpoint)

lss-all-ml-p2-xlarge


## Run the script in the docker container
You can also use existing Docker images, including images that you run on other platforms, such Kubernetes.
This will be output into the training folder as `output-1` for the script processor.

In [59]:
%%time
script_processor.run(
    code="inference_script.py",
    inputs=[
        ProcessingInput(
            source=f"s3://{bucket}/data/raw/imgs/original/testing/",
            destination="/opt/ml/processing/input/data",
        )
    ],
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output/train", 
            destination=f"s3://{bucket}/data/raw/imgs/original/testing/anno_{endpoint}-chip1024-crf",
            s3_upload_mode="Continuous"),
            
    ],
    arguments=[
        "s3_input_bucket",
        bucket,
        "s3_input_key_prefix",
        prefix,
        "endpoint",
        endpoint,
        "chipsize",
        "1024"
    ],
)

darray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m3[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m4[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m3[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m5[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m5[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m2[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m2[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m2[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m3[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m4[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m5[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m5[0m
[34m<class 'numpy.ndarray'>[0m
[34m<class 'numpy.ndarray'>[0m
[34m4[0m
[34m<class 'numpy.

KeyboardInterrupt: 