In [1]:
import sagemaker
import boto3

import numpy as np

In [2]:
profile_name = 'crayon-site'

boto3.setup_default_session(profile_name=profile_name)

boto_session = boto3.Session(profile_name=profile_name) # specify your local aws profile
sagemaker_session = sagemaker.Session(boto_session)
SAGEMAKER_ROLE = "AmazonSageMaker-ExecutionRole-20200714T182988"

account_id = boto_session.client("sts").get_caller_identity().get("Account")
region_name = "us-east-2"
ecr_repository = "sitecli-inference-preprocessing-container-v0"
tag = ':latest'

processing_repository_uri = "{}.dkr.ecr.{}.amazonaws.com/{}".format(
    account_id, region_name, ecr_repository + tag
)

In [3]:
!aws ecr get-login-password --region $region_name | docker login --username AWS --password-stdin "{account_id}".dkr.ecr."{region_name}".amazonaws.com
!aws ecr create-repository --repository-name $ecr_repository --profile $profile_name

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded

An error occurred (RepositoryAlreadyExistsException) when calling the CreateRepository operation: The repository with name 'sitecli-inference-preprocessing-container-v0' already exists in the registry with id '395166463292'


In [4]:
!docker build -t $ecr_repository docker

Sending build context to Docker daemon  2.048kB
Step 1/8 : FROM python:3.7
 ---> 11c6e5fd966a
Step 2/8 : ARG PIP_NO_CACHE_DIR=1
 ---> Using cache
 ---> 3193c1139681
Step 3/8 : RUN apt-get -y update
 ---> Using cache
 ---> c91fc3394ac2
Step 4/8 : RUN apt-get -y upgrade
 ---> Using cache
 ---> ab04bac1ac8f
Step 5/8 : RUN apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev libgl1-mesa-glx git
 ---> Using cache
 ---> eb1bded5b0e6
Step 6/8 : RUN pip3 install pandas==0.25.3 scikit-learn==0.21.3 numpy Pillow opencv-python boto3 tqdm cython
 ---> Using cache
 ---> 5b1eb1c2de72
Step 7/8 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 5b938faf2bc3
Step 8/8 : ENTRYPOINT ["python3"]
 ---> Using cache
 ---> cf922c68f342
Successfully built cf922c68f342
Successfully tagged sitecli-inference-preprocessing-container-v0:latest


In [5]:
!docker tag {ecr_repository + tag} $processing_repository_uri
!docker push $processing_repository_uri

The push refers to repository [395166463292.dkr.ecr.us-east-2.amazonaws.com/sitecli-inference-preprocessing-container-v0]

[1B59731650: Preparing 
[1Be398039c: Preparing 
[1B3c45f2b6: Preparing 
[1B2d1b54ad: Preparing 
[1B28556ea3: Preparing 
[1Bbbf2ef8c: Preparing 
[1B0e907f05: Preparing 
[1Be27dc3b8: Preparing 
[1Bc5106335: Preparing 
[1Bf5e22660: Preparing 
[1B96232ed8: Preparing 
[1B0d2b5482: Preparing 
[5Bc5106335: Pushed   520.4MB/509.9MB[2K[12A[2K[10A[2K[12A[2K[10A[2K[12A[2K[10A[2K[12A[2K[10A[2K[12A[2K[13A[2K[9A[2K[12A[2K[11A[2K[12A[2K[10A[2K[9A[2K[12A[2K[12A[2K[9A[2K[12A[2K[10A[2K[13A[2K[10A[2K[12A[2K[8A[2K[12A[2K[10A[2K[10A[2K[12A[2K[13A[2K[13A[2K[13A[2K[9A[2K[12A[2K[13A[2K[12A[2K[13A[2K[12A[2K[13A[2K[13A[2K[12A[2K[13A[2K[12A[2K[7A[2K[12A[2K[6A[2K[12A[2K[13A[2K[12A[2K[10A[2K[7A[2K[13A[2K[6A[2K[13A[2K[12A[2K[13A[2K[6A[2K[13A[2K[6A[2K[13A[2K[12A

In [13]:
processing_repository_uri

'395166463292.dkr.ecr.us-east-2.amazonaws.com/sitecli-inference-preprocessing-container-v0:latest'

In [6]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

script_processor = ScriptProcessor(
    command=["python3"],
    image_uri=processing_repository_uri,
    role=SAGEMAKER_ROLE,
    instance_count=1,
    instance_type="ml.r5.large",
    sagemaker_session=sagemaker_session,
)

In [14]:
s3 = boto_session.resource("s3")

In [20]:
import re

In [79]:
path = "s3://afdf-dfad.dfa/rajfasd/f.png"

In [80]:
pattern = "s3://([a-z0-9-.]+)/([\x00-\x7F]+)"

In [82]:
re.search(pattern, path)[1]

'afdf-dfad.dfa'

In [83]:
import datetime

In [96]:
wtl = datetime.datetime.utcnow()

In [97]:
wtl

datetime.datetime(2020, 9, 15, 1, 3, 56, 52329)

In [99]:
wtl.strftime("%Y-%m-%d-%H-%M-%S-%f")

'2020-09-15-01-03-56-052329'

In [18]:
wtl = s3.Object('st-crayon', 'TestSite/ri4386_chicagoil_DBM6891680292_2020.tif').load()

ValueError: Required parameter key not set

In [16]:
wtl

In [100]:
script_processor.run(
    code="preprocess.py",
    inputs=[
        ProcessingInput(
            source="s3://st-crayon-dev/cli/inference/raw_input/ri4386_chicagoil_DBM6891680292_2020.tif",
            destination="/opt/ml/processing/input/data/",
        )
    ],
    outputs=[
        ProcessingOutput(
            source="/opt/ml/processing/output/test/",
            destination=f"s3://st-crayon-dev/cli/inference/input_crops",
            s3_upload_mode="Continuous",
        ),
    ],
    arguments=[
        "crop_width",
        "896",
        "crop_height",
        "896",
        "input_width",
        "224",
        "input_height",
        "224",
        "overlap_factor",
        "3",
    ],
)


Job Name:  sitecli-inference-preprocessing-contain-2020-09-15-02-07-08-976
Inputs:  [{'InputName': 'input-1', 'S3Input': {'S3Uri': 's3://st-crayon-dev/cli/inference/raw_input/ri4386_chicagoil_DBM6891680292_2020.tif', 'LocalPath': '/opt/ml/processing/input/data/', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-395166463292/sitecli-inference-preprocessing-contain-2020-09-15-02-07-08-976/input/code/preprocess.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'output-1', 'S3Output': {'S3Uri': 's3://st-crayon-dev/cli/inference/input_crops', 'LocalPath': '/opt/ml/processing/output/test/', 'S3UploadMode': 'Continuous'}}]
........................
..

In [101]:
str(5756)

'5756'