# SageMaker Batch Transform with Torchserve

#### Imports

In [1]:
import base64
import json
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import boto3, time, json
import sagemaker

**Initiate session and retrieve region, account details**

In [2]:
sess = boto3.Session()
region = sess.region_name
account = boto3.client("sts").get_caller_identity().get("Account")

In [3]:
sm = sess.client("sagemaker")
role = sagemaker.get_execution_role()

#### Prepare model

In [None]:
model_file_name = "flores_small"
sagemaker_session = sagemaker.Session()
bucket_name = sagemaker_session.default_bucket()
prefix = "Dyna"

In [None]:
!wget https://torchserve.pytorch.org/mar_files/flores_small.mar
!tar cvfz {model_file_name}.tar.gz flores_small.mar
!aws s3 cp {model_file_name}.tar.gz s3://{bucket_name}/{prefix}/models/

In [None]:
model_artifact = "s3://{bucket_name}/{prefix}/models/flores_small.tar.gz"  # This should be changed to S3 path generated above

In [None]:
model_name = "floressmall-torchserve-sagemaker"

## Build a custom container

In [None]:
%%sh

container_name=flores-torchserve-sagemaker
account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${container_name}"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${container_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${container_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.
docker build  -t ${container_name} docker/
docker tag ${container_name} ${fullname}

docker push ${fullname}

#### Create Sagemaker model, deploy and run batch transform

In [9]:
registry_name = "flores-torchserve-sagemaker"
image = f"{account}.dkr.ecr.{region}.amazonaws.com/{registry_name}:latest"

container = {"Image": image, "ModelDataUrl": model_artifact}

create_model_response = sm.create_model(
    ModelName=model_name, ExecutionRoleArn=role, PrimaryContainer=container
)

print(create_model_response["ModelArn"])

arn:aws:sagemaker:us-west-2:850464037171:model/floressmall-torchserve-sagemaker


### Batch transform jobs

* The s3 bucket is the bucket_name that has been created at the start of the notebook.
* Make sure in the bucker name you create the batch_input and batch_output folders as shown below.
* Make sure the dataset files/ shared input files, are placed in the batch_input folder.

In [10]:
batch_input = f"s3://{bucket_name}/Dyna/batch_transform_flores_torchserve_sagemaker/"

batch_output = f"s3://{bucket_name}/Dyna/batch_transform_flores_torchserve_sagemaker_output/"

In [17]:
!aws s3 cp --recursive flores_inputs/ {batch_input}

upload: flores_inputs/flores101-small6-devtest.json to s3://sagemaker-us-west-2-850464037171/Dyna/batch_transform_flores_torchserve_sagemaker/flores101-small6-devtest.json
upload: flores_inputs/flores101-small5-devtest.json to s3://sagemaker-us-west-2-850464037171/Dyna/batch_transform_flores_torchserve_sagemaker/flores101-small5-devtest.json
upload: flores_inputs/flores101-small4-devtest.json to s3://sagemaker-us-west-2-850464037171/Dyna/batch_transform_flores_torchserve_sagemaker/flores101-small4-devtest.json
upload: flores_inputs/flores101-small3-devtest.json to s3://sagemaker-us-west-2-850464037171/Dyna/batch_transform_flores_torchserve_sagemaker/flores101-small3-devtest.json
upload: flores_inputs/flores101-small2-devtest.json to s3://sagemaker-us-west-2-850464037171/Dyna/batch_transform_flores_torchserve_sagemaker/flores101-small2-devtest.json


In [12]:
import time

batch_job_name = "flores-batch" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
batch_job_name

'flores-batch2021-11-02-23-16-23'

In [13]:
request = {
    "ModelClientConfig": {
        "InvocationsTimeoutInSeconds": 3600,
        "InvocationsMaxRetries": 1,
    },
    "TransformJobName": batch_job_name,
    "ModelName": model_name,
    "MaxConcurrentTransforms": 1,
    "BatchStrategy": "MultiRecord",
    "TransformOutput": {
        "S3OutputPath": batch_output,
        "AssembleWith": "Line",
        "Accept": "application/json",
    },
    "TransformInput": {
        "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": batch_input}},
        "SplitType": "Line",
        "ContentType": "application/json",
    },
    "TransformResources": {"InstanceType": "ml.g4dn.xlarge", "InstanceCount": 1},
}

In [14]:
%%time
sm.create_transform_job(**request)

while True:
    response = sm.describe_transform_job(TransformJobName=batch_job_name)
    status = response["TransformJobStatus"]
    if status == "Completed":
        print("Transform job ended with status: " + status)
        break
    if status == "Failed":
        message = response["FailureReason"]
        print("Transform failed with the following error: {}".format(message))
        raise Exception("Transform job failed")
    print("Transform job is still in status: " + status)
    time.sleep(30)



Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job ended with status: Completed
CPU times: user 182 ms, sys: 8.07 ms, total: 190 ms
Wall time: 7min 1s
