# Pushing the Model in ECR

In [20]:
import boto3

account_id = boto3.client('sts').get_caller_identity().get('Account')
ecr_repository = 'sagemaker-processing-container'
tag = ':latest'
region = boto3.session.Session().region_name

processing_repository_uri = '{}.dkr.ecr.{}.amazonaws.com/{}'.format(account_id, region, ecr_repository + tag)

# Create ECR repository and push docker image
!chmod +x docker/preprocessing.py
!docker build -t $ecr_repository docker
!$(aws ecr get-login --region $region --registry-ids $account_id --no-include-email)
#!aws ecr create-repository --repository-name $ecr_repository
!docker tag {ecr_repository + tag} $processing_repository_uri
!docker push $processing_repository_uri

Sending build context to Docker daemon  7.168kB
Step 1/6 : FROM python:3.7-slim-buster
 ---> d7ee20941226
Step 2/6 : RUN pip3 install pandas==0.25.3 scikit-learn==0.21.3
 ---> Using cache
 ---> 6b80a1c8ca03
Step 3/6 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> d090efbba3c5
Step 4/6 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> 73164899c9de
Step 5/6 : COPY preprocessing.py /opt/ml/code/preprocessing.py
 ---> cb1f1ef6a7a6
Step 6/6 : WORKDIR /opt/ml/code
 ---> Running in 8582d1e0a1fe
Removing intermediate container 8582d1e0a1fe
 ---> 7d6f796514c8
Successfully built 7d6f796514c8
Successfully tagged sagemaker-processing-container:latest
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
The push refers to repository [809912564797.dkr.ecr.us-east-2.amazonaws.com/sagemaker-processing-container]

[1B43add773: Preparing 
[1Bbe57dad2: Preparing 
[1B14420313: Preparing 
[1Bfa00b93a: Preparing 
[1B0547ed37: Preparing 
[1B837eded8

# Defining the Script Processor Instance

In [7]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker import get_execution_role

role = get_execution_role()

script_processor = ScriptProcessor(command=['python3'],
                image_uri=processing_repository_uri,
                role=role,
                instance_count=1,
                instance_type='ml.m5.xlarge')

# Running the Script Processor

In [10]:
input_data = 's3://slytherins-test/Train.csv'

script_processor.run(code='preprocessing.py',
                     inputs=[ProcessingInput(
                        source=input_data,
                        destination='/opt/ml/processing/input')],
                      outputs=[ProcessingOutput(source='/opt/ml/processing/train', destination='s3://slytherins-test/'),
                               ProcessingOutput(source='/opt/ml/processing/test', destination='s3://slytherins-test/')])


Job Name:  sagemaker-processing-container-2020-05-02-06-43-45-399
Inputs:  [{'InputName': 'input-1', 'S3Input': {'S3Uri': 's3://slytherins-test/Train.csv', 'LocalPath': '/opt/ml/processing/input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'S3Input': {'S3Uri': 's3://sagemaker-us-east-2-809912564797/sagemaker-processing-container-2020-05-02-06-43-45-399/input/code/preprocessing.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'output-1', 'S3Output': {'S3Uri': 's3://slytherins-test/', 'LocalPath': '/opt/ml/processing/train', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'output-2', 'S3Output': {'S3Uri': 's3://slytherins-test/', 'LocalPath': '/opt/ml/processing/test', 'S3UploadMode': 'EndOfJob'}}]
.................[34mReceived argument

# Getting the Processing Job Description

In [21]:
preprocessing_job_description = script_processor.jobs[-1].describe()

output_config = preprocessing_job_description['ProcessingOutputConfig']
for output in output_config['Outputs']:
    if output['OutputName'] == 'output-1':
        preprocessed_training_data = output['S3Output']['S3Uri']
    if output['OutputName'] == 'output-2':
        preprocessed_test_data = output['S3Output']['S3Uri']

# Analyzing the Predictions

In [23]:
import pandas as pd
training_features = pd.read_csv(preprocessed_training_data + 'train_features.csv', nrows=10, header=None)
print('Training features shape: {}'.format(training_features.shape))
training_features.head(n=10)

Training features shape: (10, 11)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.071924,4.22395,-0.56897,-1.532846,0.047385,-0.738147,1.371418,-0.25459,0.799954,-1.369334,-1.508289
1,-0.619814,0.075491,1.96928,0.736822,0.886662,-0.738147,-0.766479,0.450371,0.799954,-0.138882,-0.252658
2,0.751946,-0.350031,-0.232154,1.09519,-1.366252,1.354743,-0.528935,-0.959551,0.799954,-0.138882,-0.252658
3,0.071924,-0.335116,-1.224896,-1.532846,-1.695729,-0.738147,1.608963,0.097891,-0.66408,1.091569,2.258603
4,0.964806,1.359713,0.480442,1.334103,-1.145858,-0.738147,-0.291391,-0.607071,-0.66408,1.091569,1.002972
5,1.603384,-0.248602,-1.32966,0.139541,-1.339537,-0.738147,-0.291391,1.507813,-0.66408,-1.369334,-0.252658
6,1.16584,1.553906,-0.752339,-1.293934,-0.164104,-0.738147,-1.479112,-1.312032,-2.128115,1.091569,-0.252658
7,1.556082,-0.977235,0.656289,-1.293934,1.447664,-0.738147,0.421242,-1.312032,-2.128115,1.091569,-0.252658
8,1.319572,-0.075335,0.077869,0.736822,-0.633832,1.354743,-0.528935,0.450371,0.799954,-0.138882,-0.252658
9,-1.623801,-0.786506,0.281015,1.09519,1.064758,-0.738147,-0.291391,-0.959551,0.799954,-0.138882,-0.252658
