In [1]:
import boto3

region = boto3.session.Session().region_name 

account_id = boto3.client('sts').get_caller_identity().get('Account')
ecr_repository = 'sagemaker-processing-container'
tag = ':latest'
processing_repository_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region, ecr_repository + tag)


In [2]:
from sagemaker import get_execution_role
role = get_execution_role()
role

'arn:aws:iam::064258348567:role/service-role/AmazonSageMaker-ExecutionRole-20200131T104492'

In [3]:
processing_repository_uri

'064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-processing-container:latest'

In [4]:
!docker build -t $ecr_repository .

Sending build context to Docker daemon  94.72kB
Step 1/9 : FROM python:3.7-slim-buster
3.7-slim-buster: Pulling from library/python

[1B5a41d630: Pulling fs layer 
[1B8b7b4c32: Pulling fs layer 
[1Ba3eb7bae: Pulling fs layer 
[1Be182f49d: Pulling fs layer 
[1BDigest: sha256:5375725c3c0a0215279c1c5ddb33f91d31f0eb37010140397e5c7e5530073d2c[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[4A[2K[4A[2K[4A[2K[4A[2K[3A[2K[3A[2K[3A[2K[3A[2K[3A[2KPull complete [2A[2K[1A[2K[1A[2K[1A[2K
Status: Downloaded newer image for python:3.7-slim-buster
 ---> e7d86653f62f
Step 2/9 : MAINTAINER ASC "mbod@asc.upenn.edu jmparelman@gmail.com"
 ---> Running in 937350084f48
Removing intermediate container 937350084f48
 ---> ac1207550f87
Step 3/9 : RUN apt-get -y update && apt-get install -y --no-install-recommends         build-essential         python-dev         python3-pip     && pip3 install -U pip
 ---> Running in 0d00bf060

In [5]:
account_id

'064258348567'

In [6]:
!aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 064258348567.dkr.ecr.us-east-1.amazonaws.com


https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [7]:
!aws ecr create-repository --repository-name $ecr_repository



An error occurred (RepositoryAlreadyExistsException) when calling the CreateRepository operation: The repository with name 'sagemaker-processing-container' already exists in the registry with id '064258348567'


In [8]:
!docker tag {ecr_repository + tag} $processing_repository_uri
!docker push $processing_repository_uri

The push refers to repository [064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-processing-container]

[1B8684f615: Preparing 
[1B41d98a2a: Preparing 
[1Bc60b42ea: Preparing 
[1B04c43c6b: Preparing 
[1B3c6439bc: Preparing 
[1B6bb9ffea: Preparing 
[1B663e622b: Preparing 
[1B845af46d: Preparing 
[7Bc60b42ea: Pushed   656.8MB/647.6MBA[2K[7A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[9A[2K[6A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[7A[2K[8A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[7A[2K[7A[2K[6A[2K[7A[2K[7A[2K[7A[2K[6A[2K[6A[2K[6A[2K[7A[2K[6A[2K[7A[2K[7A[2K[6A[2K[7A[2K[6A[2K[6A[2K[7A[2K[7A[2K[7A[2K[6A[2K[7A[2K[6A[2K[7A[2K[6A[2K[6A[2K[6A[2K[6A[2K

In [9]:
!echo $processing_repository_uri

064258348567.dkr.ecr.us-east-1.amazonaws.com/sagemaker-processing-container:latest


# can we now run the image?

In [8]:
role_arn = 'arn:aws:sts::064258348567:assumed-role/AmazonSageMaker-ExecutionRole-20200131T104492/SageMaker'

In [9]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

script_processor = ScriptProcessor(command=['python3'],
                image_uri=processing_repository_uri,
                role=role,
                instance_count=1,
                instance_type='ml.m5.2xlarge')


In [10]:
script_processor.run(code='scripts/congress_pre_process.py',
                     inputs=[ProcessingInput(
                        source='s3://ascsagemaker/JMP_congressional_nmf/raw_data',
                        destination='/opt/ml/processing/input')],
                      outputs=[ProcessingOutput(
                        source='/opt/ml/processing/output',
                        destination='s3://ascsagemaker/JMP_congressional_nmf/dtms')],
                      arguments=['104']
                    )


Job Name:  sagemaker-processing-container-2021-05-05-14-53-53-292
Inputs:  [{'InputName': 'input-1', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://ascsagemaker/JMP_congressional_nmf/raw_data', 'LocalPath': '/opt/ml/processing/input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-064258348567/sagemaker-processing-container-2021-05-05-14-53-53-292/input/code/congress_pre_process.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'output-1', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://ascsagemaker/JMP_congressional_nmf/dtms', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]
[34mTraceback (most recent call last):
  File "/opt/ml/processing/inpu

UnexpectedStatusException: Error for Processing job sagemaker-processing-container-2021-05-05-14-53-53-292: Failed. Reason: AlgorithmError: See job logs for more information