#### Imports 

In [1]:
from sagemaker import get_execution_role
from time import gmtime, strftime
import pandas as pd
import sagemaker
import logging
import boto3
import time

##### Setup logging

In [2]:
logger = logging.getLogger('__name__')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

#### Essentials

In [3]:
bucket = sagemaker.Session().default_bucket()
prefix = 'diabetic-readmission-prediction'
region = 'us-east-1'

In [4]:
batch_input = f's3://{bucket}/{prefix}/batch_input/'
batch_input

's3://sagemaker-us-east-1-119174016168/diabetic-readmission-prediction/batch_input/'

In [5]:
batch_output = f's3://{bucket}/{prefix}/batch_output/'
batch_output

's3://sagemaker-us-east-1-119174016168/diabetic-readmission-prediction/batch_output/'

In [6]:
current_timestamp = strftime('%Y-%m-%d-%H-%M-%S', gmtime())

In [7]:
automl_job_name = 'diabetic-readmission-prediction'  # Copy this from the console
model_name = f'autopilot-best-model-{current_timestamp}'
transform_job_name = f'autopilot-batch-job-{current_timestamp}'

In [8]:
session = boto3.Session()
sagemaker_execution_role = get_execution_role()
sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client('sagemaker', region_name=region)
s3_client = boto3.client('s3')

#### Copy batch input data from local to S3

In [9]:
!aws s3 cp .././data/diabetic_readmission_unlabeled.csv {batch_input}

upload: ../data/diabetic_readmission_unlabeled.csv to s3://sagemaker-us-east-1-119174016168/diabetic-readmission-prediction/batch_input/diabetic_readmission_unlabeled.csv


#### Get the best model using Autopilot job name

In [10]:
best_candidate = sagemaker_client.describe_auto_ml_job(AutoMLJobName=automl_job_name)["BestCandidate"]
best_candidate_name = best_candidate["CandidateName"]
logger.info(f"CandidateName: {best_candidate_name}")
logger.info(f'FinalAutoMLJobObjectiveMetricName: {best_candidate["FinalAutoMLJobObjectiveMetric"]["MetricName"]}')
logger.info(f'FinalAutoMLJobObjectiveMetricValue: {best_candidate["FinalAutoMLJobObjectiveMetric"]["Value"]}')

CandidateName: diabetic-readmission-predictioeS-187-52cde9d0
FinalAutoMLJobObjectiveMetricName: validation:macro_f_beta
FinalAutoMLJobObjectiveMetricValue: 0.4098981022834778


In [11]:
model = sagemaker_client.create_model(Containers=best_candidate["InferenceContainers"], 
                                      ModelName=model_name, 
                                      ExecutionRoleArn=sagemaker_execution_role)

logger.info(f'Model ARN corresponding to the best candidate is : {model["ModelArn"]}')

Model ARN corresponding to the best candidate is : arn:aws:sagemaker:us-east-1:119174016168:model/autopilot-best-model-2021-11-03-15-42-56


#### Create Batch Transform job 

In [12]:
transform_input = {
    "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": batch_input}},
    "ContentType": "text/csv",
    "CompressionType": "None",
    "SplitType": "Line",
}

transform_output = {
    "S3OutputPath": batch_output,
}

transform_resources = {"InstanceType": "ml.m5.4xlarge", "InstanceCount": 1}

sagemaker_client.create_transform_job(
    TransformJobName=transform_job_name,
    ModelName=model_name,
    TransformInput=transform_input,
    TransformOutput=transform_output,
    TransformResources=transform_resources,
)

{'TransformJobArn': 'arn:aws:sagemaker:us-east-1:119174016168:transform-job/autopilot-batch-job-2021-11-03-15-42-56',
 'ResponseMetadata': {'RequestId': 'a494f4e9-815f-4f93-bb4b-6a5fb4ceca89',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a494f4e9-815f-4f93-bb4b-6a5fb4ceca89',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '116',
   'date': 'Wed, 03 Nov 2021 15:43:32 GMT'},
  'RetryAttempts': 0}}

#### Check the status of the running job

In [13]:
logger.info("[JobStatus]\n")

describe_response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
job_run_status = describe_response["TransformJobStatus"]
logger.info(job_run_status)

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
    job_run_status = describe_response["TransformJobStatus"]
    logger.info(job_run_status)
    time.sleep(30)

[JobStatus]

InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
Completed


#### Download the output of the batch transform job from S3 to local

In [15]:
s3_output_key = f"{prefix}/batch_output/diabetic_readmission_unlabeled.csv.out"
local_inference_results_path = ".././data/inference_results.csv"

s3 = boto3.resource("s3")


inference_results_bucket = s3.Bucket(sagemaker_session.default_bucket())

inference_results_bucket.download_file(s3_output_key, local_inference_results_path)

#### Inspect the results

In [16]:
data = pd.read_csv(local_inference_results_path, sep=";")
pd.set_option("display.max_rows", 10)  
data

Unnamed: 0,no
0,no
1,>30
2,no
3,>30
