## Install and import libraries

In [None]:
# update SageMaker libraries
!pip3 install -U sagemaker --disable-pip-version-check

In [None]:
import boto3
import io
import os
import time
import pandas as pd
import numpy as np
import sagemaker
import uuid
from sagemaker import image_uris
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator
from sagemaker import get_execution_role

# Enter a bucket name - you can use the same bucket you used in other chapters
bucket = 'your-S3-bucket-name'
prefix = 'aiml-book/chapter9'
s3 = boto3.client('s3')

## Pre-process data

In [None]:
# load to dataframe
raw_df = pd.read_csv('kaggle-house-prices-dataset.csv', header=0)
# drop columns containing null values
raw_df.dropna(axis=1, inplace=True)
raw_df.head()

XGBoost algorithm considers the **first feature as the predicted or label feature**. In our example, we are training a model to predict the **SalePrice** of a house

In [None]:
# Create a low dimensional dataset with a few numeric and categorical features for our example
small_df = raw_df[['SalePrice','LotArea','Street','LotShape','LandContour','LotConfig','YrSold','SaleType','SaleCondition']]
small_df.head()

In [None]:
# perform numerical encoding of categorical features
encoded_df = pd.get_dummies(small_df)
encoded_df.head()
# you can see from the results below that one hot encoding was applied for categorical variables

In [None]:
# Select 90% as train data and 10% as test data
train_index=int(0.9 * len(encoded_df))
train_df = encoded_df.iloc[:train_index,:]
test_df = encoded_df.iloc[train_index:,:]
# remove the label feature from the test dataset
test_df_no_label = test_df.drop(['SalePrice'], axis=1)
print("Train dataset dimensions: " + str(train_df.shape))
print("Test dataset dimensions: " + str(test_df_no_label.shape))

In [None]:
# Create CSV files and upload to S3 bucket
train_df.to_csv('train.csv',index=False, header=False)
test_df_no_label.to_csv('test.csv',index=False, header=False)
s3.upload_file('train.csv',bucket,prefix+'/train/train.csv')
s3.upload_file('test.csv',bucket,prefix+'/test/test.csv')

In [None]:
# create a training input for SageMaker model training
train_input = TrainingInput('s3://{}/{}/{}/'.format(bucket, prefix, 'train'), content_type='csv')
print(train_input)

## Start Model Training

Please refer to SageMaker documentation for descriptions of hyperparameters - https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost_hyperparameters.html

In [None]:
# First let us define the hyperparameters
xgboost_hps = {
        "max_depth":"6",
        "eta":"0.3",
        "gamma":"2",
        "min_child_weight":"4",
        "subsample":"0.5",
        "objective":"reg:squarederror",
        "num_round":"30"  
}

In [None]:
# Next let us get the XGBoost built-in image
xgb_image = sagemaker.image_uris.retrieve("xgboost", 'us-east-1', "1.5-1")

In [None]:
# Define the output path to store model artifacts
model_prefix = prefix+'/xgboost-model'
model_output = 's3://{}/{}/output'.format(bucket, model_prefix)

In [None]:
# Build XGBoost estimator
xgb_estimator = Estimator(image_uri=xgb_image, 
                            hyperparameters=xgboost_hps,
                            role=sagemaker.get_execution_role(),
                            instance_count=1, 
                            instance_type='ml.m5.2xlarge', 
                            volume_size=10, # 10 GB 
                            output_path=model_output)

In [None]:
# Fit the estimator to run training
xgb_estimator.fit({'train':train_input})

While we can directly use the Estimator object to create a model and deploy it to an endpoint, for the purposes of this example, we will use the boto3 APIs to show how to create a model, create an endpoint configuration and finally create an endpoint and deploy the model. This decoupling will show you how you can simple bring a trained model and directly host it on SageMaker

## Inference Options

### Real-time endpoint single-container single-model

In [None]:
# Model artifacts are stored here
print(xgb_estimator.model_data)

#### Create Model

In [None]:
# get SageMaker boto3 handle
sm = boto3.client('sagemaker')
# our model name
model_name = 'chapter9-xgboost-model-one-container1'
# now let us create a model based on the trained model artifacts
model_res = sm.create_model(
                ModelName = model_name,
                ExecutionRoleArn = sagemaker.get_execution_role(),
                PrimaryContainer = {
                    'Image': xgb_image,
                    'ModelDataUrl': xgb_estimator.model_data,
                })


In [None]:
print(model_res['ModelArn'])

#### Create Endpoint Configuration

In [None]:
ep_config_name = model_name +'-epconfig'
epcfg_response = sm.create_endpoint_config(
    EndpointConfigName=ep_config_name, 
    ProductionVariants=[
        {
            "VariantName": "chapter9-test-variant", # The name of the production variant.
            "ModelName": model_name, 
            "InstanceType": 'ml.m5.xlarge', # Specify the compute instance type.
            "InitialInstanceCount": 1 # Number of instances to launch initially.
        }
    ]
)
print("Endpoint Configuration successfully created: " + epcfg_response['EndpointConfigArn'])

#### Create Endpoint

In [None]:
ep_name = model_name+'-ep' 
ep_response = sm.create_endpoint(EndpointName=ep_name, EndpointConfigName=ep_config_name)

In [None]:
# Wait until the print statement here shows InService
print(sm.describe_endpoint(EndpointName=ep_name)['EndpointStatus'])

#### Invoke Endpoint

In [None]:
# create a buffer for the csv request data from our test dataset
from io import StringIO
inf_req = StringIO()
test_df_no_label.to_csv(inf_req,header=False, index=False)
# if you want to check if your buffer is created correctly, uncomment and execute the below line
#print(inf_req.getvalue())

In [None]:
# we need a runtime handler for SageMaker
sm_run = boto3.client("sagemaker-runtime")
# now call the endpoint
predictions = sm_run.invoke_endpoint(
                            EndpointName=ep_name, 
                            Body=inf_req.getvalue(), # the values from the StringIO buffer we created in the previous cell
                            ContentType='text/csv'
                            )
#check if we getproper response - the predicted sale prices
print(predictions['Body'].read().decode('utf-8'))

### Real-time endpoint Serverless

#### Create Model

In [None]:
# get SageMaker boto3 handle
sm = boto3.client('sagemaker')
# our model name
model_name_serverless = 'chapter9-xgboost-model-serverless'
# now let us create a model based on the trained model artifacts. For serverless we will use the Containers list rather than the PrimaryContainer
model_res_serverless = sm.create_model(
                ModelName = model_name_serverless,
                ExecutionRoleArn = sagemaker.get_execution_role(),
                Containers = [{
                    'Image': xgb_image,
                    'Mode': 'SingleModel',
                    'ModelDataUrl': xgb_estimator.model_data,
                }]
            )
print(model_res_serverless['ModelArn'])

#### Create Serverless Endpoint Configuration

In [None]:
ep_config_name_serverless = model_name_serverless +'-epconfig'
epcfg_response_serverless = sm.create_endpoint_config(
    EndpointConfigName=ep_config_name_serverless, 
    ProductionVariants=[
        {
            'VariantName': 'chapter9-serverless', 
            'ModelName': model_name_serverless, 
            'ServerlessConfig': {
                "MemorySizeInMB": 3072,
                "MaxConcurrency": 25
            }
        }
    ]
)
print("Serverless Endpoint Configuration successfully created: " + epcfg_response_serverless['EndpointConfigArn'])

#### Create Serverless Endpoint

In [None]:
# The name of the endpoint
ep_name_serverless = model_name_serverless+'-ep' 
ep_response_serverless = sm.create_endpoint(EndpointName=ep_name_serverless, EndpointConfigName=ep_config_name_serverless)

In [None]:
# Wait until the print statement here shows InService - should take 3 to 5 mins
print(sm.describe_endpoint(EndpointName=ep_name_serverless)['EndpointStatus'])

#### Invoke Serverless Endpoint

In [None]:
# create a buffer for the csv request data from our test dataset
from io import StringIO
inf_req_svl = StringIO()
test_df_no_label.to_csv(inf_req_svl,header=False, index=False)
# if you want to check if your buffer is created correctly, uncomment and execute the below line
#print(inf_req.getvalue())

In [None]:
# we need a runtime handler for SageMaker
sm_run = boto3.client("sagemaker-runtime")
# now call the endpoint
serverless_predictions = sm_run.invoke_endpoint(
                            EndpointName=ep_name, 
                            Body=inf_req_svl.getvalue(), # the values from the StringIO buffer we created in the previous cell
                            ContentType='text/csv'
                            )
#check if we getproper response - the predicted sale prices
print(serverless_predictions['Body'].read().decode('utf-8'))

### Inference with SageMaker Batch Transform

In [None]:
# S3 location for our test dataset
s3_test = 's3://{}/{}/{}'.format(bucket, prefix, 'test/test.csv')
s3_batch_out = 's3://{}/{}/{}'.format(bucket, prefix, 'batch/output')

In [None]:
# input details for the Batch Transform
transform_input = {
    'DataSource': {
        'S3DataSource': {
            'S3DataType':'S3Prefix',
            'S3Uri':s3_test
        }
    },
    'ContentType':'text/csv',
    'SplitType':'Line'
}

In [None]:
# location for storing batch outputs
transform_output = {
    'S3OutputPath':s3_batch_out,
    'AssembleWith':'Line'
}

In [None]:
# configure compute for the batch transform
transform_resources = {
    'InstanceType':'ml.m5.2xlarge',
    'InstanceCount': 1
}

In [None]:
# run the batch transform job
batch_job_name = 'chapter9-batch-inference'
batch_res = sm.create_transform_job(
                TransformJobName=batch_job_name,
                ModelName=model_name,
                MaxPayloadInMB=1,
                BatchStrategy='MultiRecord',
                TransformInput=transform_input,
                TransformOutput=transform_output,
                TransformResources=transform_resources)

In [None]:
job_details = sm.describe_transform_job(TransformJobName=batch_job_name)
print("Job Name is: "+job_details['TransformJobName'])
print("Job Status is: "+ job_details['TransformJobStatus'])
print("Model Name is: "+ job_details['ModelName'])
print("Job results will be available in: "+ job_details['TransformOutput']['S3OutputPath'])

In [None]:
# Read and print the outputs from the batch transform job
out_file = 'test.csv.out'
s3.download_file(bucket, prefix+'/batch/output/'+out_file, out_file)
out_df = pd.read_csv(out_file,header=None)
out_df.head()

## SageMaker Elastic Inference

Now that we have seen how to build and use endpoints for hosting, adding an Elastic inference is really easy. All you have to do is compile our model using SageMaker Neo, and specify an accelerator of type Elastic Inference when you create your endpoint configuration. We will see how to do this in this section.

### Import a pre-trained TensorFlow model

In [None]:
import tarfile
import tensorflow as tf
from sagemaker.tensorflow import TensorFlowModel
from tensorflow import keras
# import resnet50
resnet_model = keras.applications.resnet50.ResNet50(weights='imagenet', include_top=True)
# save model and create a tar.gz that SageMaker needs to create the Tensorflow estimator
m_dir = '1'
tf.saved_model.save(resnet_model,m_dir)
# open a tar file and save model contents
with tarfile.open('model.tar.gz','w:gz') as entry:
    entry.add(m_dir)


### Load the model to S3

In [None]:
tf_path = 'tensorflow/model/model.tar.gz'
tf_s3_path = 's3://{}/{}/{}'.format(bucket,prefix,tf_path)
s3.upload_file('model.tar.gz',bucket,prefix+'/'+tf_path)
# Create a Tensorflow estimator reference from the model
tf_model = TensorFlowModel(model_data=tf_s3_path, framework_version='2.3', role=sagemaker.get_execution_role())

In [None]:
print(tf_s3_path)

In [None]:
# To deploy it to a SageMaker endpoint with an Elastic Inference accelerator attached we simply pass this to the deploy method
tf_endpoint = tf_model.deploy(instance_type='ml.m5.xlarge', initial_instance_count=1, accelerator_type="ml.eia2.medium")

### Run prediction of a cat image

In [None]:
import PIL
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt
cat_pic = load_img('phoebe.PNG', target_size=(224,224))
plt.imshow(cat_pic)
plt.show()

In [None]:
np_arr = img_to_array(cat_pic)
arr_bat = np.expand_dims(np_arr, axis=0)
arr_bat.shape

In [None]:
# preprocess the image for prediction
b4_pred_img = keras.applications.resnet50.preprocess_input(arr_bat.copy())
# make predictions and decode the output to a class
results = tf_endpoint.predict({"inputs": b4_pred_img.tolist()})
# convert to numpy array
new_res = np.array(results['outputs'])
# Get class predictions for the picture
print(keras.applications.imagenet_utils.decode_predictions(new_res))