## Imports and Setup

In [1]:
import sagemaker
import boto3
import time

from PIL import Image
import numpy as np
import io

profile = 'crayon-site'
region_name='us-east-2'
bucket = 'st-crayon-dev'
prefix = 'sagemaker/labelbox/'
role = 'arn:aws:iam::395166463292:role/service-role/AmazonSageMaker-ExecutionRole-20200714T182988'

from botocore.exceptions import ProfileNotFound

try:
    boto3.setup_default_session(profile_name=profile)
except ProfileNotFound:
    print("crayon-site profile not found. Using default aws profile.")
    

session = boto3.session.Session(profile_name = profile, region_name = region_name)
sess = sagemaker.Session(session,default_bucket=bucket)
sagemaker_client = session.client('sagemaker')
account_id = session.client('sts').get_caller_identity().get('Account')

In [2]:
ecr_repository = 'ss-processing-container-v0'
tag = ':sitetools'
processing_repository_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region_name, ecr_repository + tag)

## Creates an Amazon Elastic Container Registry (Amazon ECR) repository

This step needs the aws cli installed locally, and sets up the repository to store the docker container to run the script processor with.

NOTE: This only needs to be done once!

In [5]:
!aws ecr get-login-password --region $region_name | docker login --username AWS --password-stdin "{account_id}".dkr.ecr."{region_name}".amazonaws.com
!aws ecr create-repository --repository-name $ecr_repository --profile $profile

Sending build context to Docker daemon   2.56kB
Step 1/7 : FROM python:3.7-slim-buster
3.7-slim-buster: Pulling from library/python

[1B52930446: Pulling fs layer 
[1Bb58d08e6: Pulling fs layer 
[1B8cd499d2: Pulling fs layer 
[1B81fc14b2: Pulling fs layer 
[1BDigest: sha256:d37be9a444493e0a542a6d6d7465ac12f485b3c5bb4e40c69be72adfe3d8db6a4A[2K[5A[2K[3A[2K[3A[2K[3A[2K[5A[2K[5A[2K[3A[2K[5A[2K[3A[2K[3A[2K[3A[2K[3A[2K[5A[2K[5A[2K[5A[2K[1A[2K[5A[2K[1A[2K[5A[2K[1A[2K[1A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[4A[2K[4A[2K[4A[2K[4A[2K[3A[2K[3A[2K[3A[2K[3A[2K[3A[2K[3A[2K[2A[2K[1A[2K[1A[2K[1A[2K[1A[2K
Status: Downloaded newer image for python:3.7-slim-buster
 ---> f6d88f1a6438
Step 2/7 : RUN apt-get -y update
 ---> Running in f7f552a91293
Get:1 http://deb.debian.org/debian bu

Selecting previously unselected package x11proto-core-dev.
Preparing to unpack .../16-x11proto-core-dev_2018.4-4_all.deb ...
Unpacking x11proto-core-dev (2018.4-4) ...
Selecting previously unselected package libxau-dev:amd64.
Preparing to unpack .../17-libxau-dev_1%3a1.0.8-1+b2_amd64.deb ...
Unpacking libxau-dev:amd64 (1:1.0.8-1+b2) ...
Selecting previously unselected package libxdmcp-dev:amd64.
Preparing to unpack .../18-libxdmcp-dev_1%3a1.1.2-3_amd64.deb ...
Unpacking libxdmcp-dev:amd64 (1:1.1.2-3) ...
Selecting previously unselected package xtrans-dev.
Preparing to unpack .../19-xtrans-dev_1.3.5-1_all.deb ...
Unpacking xtrans-dev (1.3.5-1) ...
Selecting previously unselected package libxcb1-dev:amd64.
Preparing to unpack .../20-libxcb1-dev_1.13.1-2_amd64.deb ...
Unpacking libxcb1-dev:amd64 (1.13.1-2) ...
Selecting previously unselected package libx11-dev:amd64.
Preparing to unpack .../21-libx11-dev_2%3a1.6.7-1_amd64.deb ...
Unpacking libx11-dev:amd64 (2:1.6.7-1) ...
Selecting previo

## Build the container using the docker command and Push to ECR

Building the container and pushing into Amazon ECR.

NOTE: This only needs to be done once!

In [None]:
!docker build -t $ecr_repository docker

In [6]:
!docker tag {ecr_repository + tag} $processing_repository_uri
!docker push $processing_repository_uri

Login Succeeded
{
    "repository": {
        "repositoryUri": "395166463292.dkr.ecr.us-east-2.amazonaws.com/ss-processing-container-v2", 
        "imageScanningConfiguration": {
            "scanOnPush": false
        }, 
        "registryId": "395166463292", 
        "imageTagMutability": "MUTABLE", 
        "repositoryArn": "arn:aws:ecr:us-east-2:395166463292:repository/ss-processing-container-v2", 
        "repositoryName": "ss-processing-container-v2", 
        "createdAt": 1596720223.0
    }
}
The push refers to repository [395166463292.dkr.ecr.us-east-2.amazonaws.com/ss-processing-container-v2]

[1B6dbe0cfa: Preparing 
[1B6c9f4bbb: Preparing 
[1B042e525f: Preparing 
[1B95c6e59b: Preparing 
[1B4143095c: Preparing 
[1B6fb81c22: Preparing 
[1Bba74fd9a: Preparing 
[1Be2403063: Preparing 
[9B6dbe0cfa: Pushed   522.8MB/515.6MB2K[6A[2K[9A[2K[8A[2K[6A[2K[8A[2K[5A[2K[5A[2K[6A[2K[7A[2K[4A[2K[9A[2K[5A[2K[4A[2K[6A[2K[9A[2K[3A[2K[3A[2K[8A[2K[5A[

## Set up the ScriptProcessor from the Amazon SageMaker Python SDK to run the script.

Setup ScriptProcessor by pointing it to the docker container we created and specifying the instance count and type that it will run on. The `endpoint` is passed in as an argument.

In [3]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
script_processor = ScriptProcessor(command=['python3'],
                image_uri=processing_repository_uri,
                role=role,
                instance_count=1,
                instance_type='ml.r5.large',
                sagemaker_session=sess,
                env={'endpoint':'endpointvalues'})

In [4]:
endpoints = sagemaker_client.list_endpoints()
endpoint_names = [endpoint['EndpointName'] for endpoint in endpoints['Endpoints']]
endpoint = endpoints['Endpoints'][0]['EndpointName']
print(endpoint_names, endpoint)

['fcn-512-new', 'psp-1024-new', 'psp-old-test'] fcn-512-new


## Run the script in the docker container
You can also use existing Docker images, including images that you run on other platforms, such Kubernetes.
This will be output into the training folder as `output-1` for the script processor.

In [5]:
%%time
script_processor.run(
    code="inference_script.py",
    inputs=[
        ProcessingInput(
            source=f"s3://{bucket}/data/raw/imgs/original/testing/images/",
            destination="/opt/ml/processing/input/data",
        )
    ],
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output/train", 
            destination=f"s3://{bucket}/data/raw/imgs/original/testing/anno_{endpoint}-chip512",
            s3_upload_mode="Continuous"),
            
    ],
    arguments=[
        "s3_input_bucket",
        bucket,
        "s3_input_key_prefix",
        prefix,
        "endpoint",
        endpoint,
        "chipsize",
        "512",
        "verbose",
        "True"
    ],
)

Parameter 'session' will be renamed to 'sagemaker_session' in SageMaker Python SDK v2.



Job Name:  ss-processing-container-v0-2020-08-20-22-50-07-664
Inputs:  [{'InputName': 'input-1', 'S3Input': {'S3Uri': 's3://st-crayon-dev/data/raw/imgs/original/testing/images/', 'LocalPath': '/opt/ml/processing/input/data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'S3Input': {'S3Uri': 's3://st-crayon-dev/ss-processing-container-v0-2020-08-20-22-50-07-664/input/code/inference_script.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'output-1', 'S3Output': {'S3Uri': 's3://st-crayon-dev/data/raw/imgs/original/testing/anno_fcn-512-new-chip512', 'LocalPath': '/opt/ml/processing/output/train', 'S3UploadMode': 'Continuous'}}]
......................[34m20_KY_1317_DBM8839393143_-_Mapping_-_DBI4447445465_DBI1861-1133894-DJI_0760 (3648, 547