### Imports 

In [77]:
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker import get_execution_role
from time import gmtime, strftime
import pandas as pd
import sagemaker
import boto3
import time

### 1. Essentials

In [53]:
BUCKET = 'sagemaker-demo-892313895307'
PREFIX = 'fruits-clf'
REGION = 'us-east-1'

In [54]:
batch_input = 's3://{}/{}/batch_test/'.format(BUCKET, PREFIX)
batch_input

's3://sagemaker-demo-892313895307/fruits-clf/batch_test/'

In [55]:
batch_output = 's3://{}/{}/batch-inference'.format(BUCKET, PREFIX)
batch_output

's3://sagemaker-demo-892313895307/fruits-clf/batch-inference'

In [56]:
current_timestamp = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

In [71]:
TRAINING_JOB_NAME = 'fruits-clf-xgboost-2020-10-21-19-34-32-454'  # COPY THIS FROM THE CONSOLE
MODEL_NAME = f'fruits-clf-xgboost-model-{current_timestamp}'
BATCH_JOB_NAME = f'fruits-clf-xgboost-batch-job-{current_timestamp}'

sagemaker_execution_role = get_execution_role()
sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client('sagemaker', region_name=REGION)
s3_client = boto3.client('s3')

container_uri = get_image_uri(REGION, 'xgboost', '1.0-1')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


### 2. Create a Model object using previously run training job name

In [59]:
info = sagemaker_client.describe_training_job(TrainingJobName=TRAINING_JOB_NAME)
info

{'TrainingJobName': 'fruits-clf-xgboost-2020-10-21-19-34-32-454',
 'TrainingJobArn': 'arn:aws:sagemaker:us-east-1:892313895307:training-job/fruits-clf-xgboost-2020-10-21-19-34-32-454',
 'ModelArtifacts': {'S3ModelArtifacts': 's3://sagemaker-demo-892313895307/fruits-clf/model-artifacts/fruits-clf-xgboost-2020-10-21-19-34-32-454/output/model.tar.gz'},
 'TrainingJobStatus': 'Completed',
 'SecondaryStatus': 'Completed',
 'HyperParameters': {'num_class': '4',
  'num_round': '100',
  'objective': 'multi:softmax'},
 'AlgorithmSpecification': {'TrainingImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3',
  'TrainingInputMode': 'File',
  'MetricDefinitions': [{'Name': 'train:mae',
    'Regex': '.*\\[[0-9]+\\].*#011train-mae:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
   {'Name': 'validation:aucpr',
    'Regex': '.*\\[[0-9]+\\].*#011validation-aucpr:([-+]?[0-9]*\\.?[0-9]+(?:[eE][-+]?[0-9]+)?).*'},
   {'Name': 'train:merror',
    'Regex': '.*\\[[0-9]+\\].*#

In [60]:
model_artifact_url = info['ModelArtifacts']['S3ModelArtifacts']
model_artifact_url

's3://sagemaker-demo-892313895307/fruits-clf/model-artifacts/fruits-clf-xgboost-2020-10-21-19-34-32-454/output/model.tar.gz'

In [61]:
primary_container = {
    'Image': container_uri,
    'ModelDataUrl': model_artifact_url
  }

In [62]:
response = sagemaker_client.create_model(
    ModelName=MODEL_NAME,
    ExecutionRoleArn=sagemaker_execution_role,
    PrimaryContainer=primary_container)

In [63]:
response

{'ModelArn': 'arn:aws:sagemaker:us-east-1:892313895307:model/fruits-clf-xgboost-model-2020-10-21-21-11-16',
 'ResponseMetadata': {'RequestId': 'f7043a4f-7b05-4130-9bee-6013e74d3096',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f7043a4f-7b05-4130-9bee-6013e74d3096',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '106',
   'date': 'Wed, 21 Oct 2020 21:11:17 GMT'},
  'RetryAttempts': 0}}

### 3. Create a Batch Transformer for Inference

In [64]:
request = {
    "TransformJobName": BATCH_JOB_NAME,
    "ModelName": MODEL_NAME,
    "BatchStrategy": "MultiRecord",
    "TransformOutput": {
        "S3OutputPath": batch_output
    },
    "TransformInput": {
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": batch_input 
            }
        },
        "ContentType": "text/csv",
        "SplitType": "Line",
        "CompressionType": "None"
    },
    "TransformResources": {
            "InstanceType": "ml.m5.xlarge",
            "InstanceCount": 1
    }
}

In [65]:
response = sagemaker_client.create_transform_job(**request)
response

{'TransformJobArn': 'arn:aws:sagemaker:us-east-1:892313895307:transform-job/fruits-clf-xgboost-batch-job-2020-10-21-21-11-16',
 'ResponseMetadata': {'RequestId': '6aa5fc57-d5f4-449a-bf9a-6ef39e9dae40',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '6aa5fc57-d5f4-449a-bf9a-6ef39e9dae40',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '125',
   'date': 'Wed, 21 Oct 2020 21:11:21 GMT'},
  'RetryAttempts': 0}}

In [66]:
while(True):
    response = sagemaker_client.describe_transform_job(TransformJobName=BATCH_JOB_NAME)
    status = response['TransformJobStatus']
    if  status == 'Completed':
        print("Transform job ended with status: {}".format(status))
        break
    if status == 'Failed':
        message = response['FailureReason']
        print('Transform failed with the following error: {}'.format(message))
        raise Exception('Transform job failed') 
    print("Transform job is still in status: {}".format(status))    
    time.sleep(30) 

Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job ended with status: Completed


### 4. Evaluate Output

In [74]:
key = f'{PREFIX}/batch-inference/batch_test.csv.out'

In [82]:
obj = s3_client.get_object(Bucket=BUCKET, Key=key)
results_df = pd.read_csv(obj['Body'], names=['Predictions'])

In [83]:
results_df

Unnamed: 0,Predictions
0,1.0
1,3.0
2,1.0
3,3.0
4,3.0
5,3.0
6,1.0
7,3.0
8,0.0
9,0.0
