# Create the assets required to build/test a docker image

## Start by creating the training script

In [None]:
%%writefile train.py
import os
import pandas as pd
import re
import joblib
import json
from sklearn.ensemble import GradientBoostingRegressor

def load_dataset(path):
    # Take the set of files and read them all into a single pandas dataframe
    files = [ os.path.join(path, file) for file in os.listdir(path) ]
    
    if len(files) == 0:
        raise ValueError("Invalid # of files in dir: {}".format(path))

    raw_data = [ pd.read_csv(file, sep=",", header=None ) for file in files ]
    data = pd.concat(raw_data)

    # labels are in the first column
    y = data.iloc[:,0]
    X = data.iloc[:,1:]
    return X,y
    
def start(args):
    print("Training mode")

    try:
        X_train, y_train = load_dataset(args.train)
        X_test, y_test = load_dataset(args.validation)
        
        hyperparameters = {
            "max_depth": args.max_depth,
            "verbose": 1, # show all logs
            "n_estimators": args.n_estimators
        }
        print("Training the classifier")
        model = GradientBoostingRegressor()
        model.set_params(**hyperparameters)
        model.fit(X_train, y_train)
        print("Score: {}".format( model.score(X_test, y_test)) )
        joblib.dump(model, open(os.path.join(args.model_dir, "boston_housing_model.pkl"), "wb"))
    
    except Exception as e:
        # Write out an error file. This will be returned as the failureReason in the
        # DescribeTrainingJob result.
        trc = traceback.format_exc()
        with open(os.path.join(output_path, "failure"), "w") as s:
            s.write("Exception during training: " + str(e) + "\\n" + trc)
            
        # Printing this causes the exception to be in the training job logs, as well.
        print("Exception during training: " + str(e) + "\\n" + trc, file=sys.stderr)
        
        # A non-zero exit code causes the training job to be marked as Failed.
        sys.exit(255)

## Now create the handler. The Inference Handler is how we use the SageMaker Inference Toolkit to encapsulate our code and expose it as a SageMaker container.

SageMaker Inference Toolkit: https://github.com/aws/sagemaker-inference-toolkit

In [None]:
%%writefile handler.py
import os
import sys
import joblib
from sagemaker_inference.default_inference_handler import DefaultInferenceHandler
from sagemaker_inference.default_handler_service import DefaultHandlerService
from sagemaker_inference import content_types, errors, transformer, encoder, decoder

class HandlerService(DefaultHandlerService, DefaultInferenceHandler):
    def __init__(self):
        op = transformer.Transformer(default_inference_handler=self)
        super(HandlerService, self).__init__(transformer=op)
    
    ## Loads the model from the disk
    def default_model_fn(self, model_dir):
        model_filename = os.path.join(model_dir, "boston_housing_model.pkl")
        return joblib.load(open(model_filename, "rb"))
    
    ## Parse and check the format of the input data
    def default_input_fn(self, input_data, content_type):
        if content_type != "text/csv":
            raise Exception("Invalid content-type: %s" % content_type)
        return decoder.decode(input_data, content_type).reshape(1,-1)
    
    ## Run our model and do the prediction
    def default_predict_fn(self, payload, model):
        return model.predict( payload ).tolist()
    
    ## Gets the prediction output and format it to be returned to the user
    def default_output_fn(self, prediction, accept):
        if accept != "text/csv":
            raise Exception("Invalid accept: %s" % accept)
        return encoder.encode(prediction, accept)

## Now create the entrypoint of the container. The main function.

We'll use SageMaker Training Toolkit (https://github.com/aws/sagemaker-training-toolkit) to work with the arguments and environment variables defined by SageMaker. This library will make our code simpler.

In [None]:
%%writefile main.py
import train
import argparse
import sys
import os
import traceback
from sagemaker_inference import model_server
from sagemaker_training import environment

if __name__ == "__main__":
    if len(sys.argv) < 2 or ( not sys.argv[1] in [ "serve", "train" ] ):
        raise Exception("Invalid argument: you must inform 'train' for training mode or 'serve' predicting mode") 
        
    if sys.argv[1] == "train":
        
        env = environment.Environment()
        
        parser = argparse.ArgumentParser()
        # https://github.com/aws/sagemaker-training-toolkit/blob/master/ENVIRONMENT_VARIABLES.md
        parser.add_argument("--max-depth", type=int, default=10)
        parser.add_argument("--n-estimators", type=int, default=120)
        
        # reads input channels training and testing from the environment variables
        parser.add_argument("--train", type=str, default=env.channel_input_dirs["train"])
        parser.add_argument("--validation", type=str, default=env.channel_input_dirs["validation"])

        parser.add_argument("--model-dir", type=str, default=env.model_dir)
        
        args,unknown = parser.parse_known_args()
        train.start(args)
    else:
        model_server.start_model_server(handler_service="serving.handler")

## Now we can create the Dockerfile

Just pay attention to the packages we'll install in our container. Here, we'll use SageMaker Inference Toolkit (https://github.com/aws/sagemaker-inference-toolkit) and SageMaker Training Toolkit (https://github.com/aws/sagemaker-training-toolkit) to prepare the container for training/serving our model. By serving you can understand: exposing our model as a webservice that can be called through an api call.

In [None]:
%%writefile Dockerfile
FROM python:3.7-buster

# Set a docker label to advertise multi-model support on the container
LABEL com.amazonaws.sagemaker.capabilities.multi-models=false
# Set a docker label to enable container to use SAGEMAKER_BIND_TO_PORT environment variable if present
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

RUN apt-get update -y && apt-get -y install --no-install-recommends default-jdk
RUN rm -rf /var/lib/apt/lists/*

RUN pip --no-cache-dir install multi-model-server sagemaker-inference sagemaker-training
RUN pip --no-cache-dir install pandas numpy scipy scikit-learn

ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PYTHONPATH="/opt/ml/code:${PATH}"

COPY main.py /opt/ml/code/main.py
COPY train.py /opt/ml/code/train.py
COPY handler.py /opt/ml/code/serving/handler.py

ENTRYPOINT ["python", "/opt/ml/code/main.py"]

# Local Test: Let's build the image locally and do some testing

Each SageMaker Jupyter Notebook already has a docker envorinment pre-installed. So we can play with Docker containers just using the same environment.

In [None]:
#!rm -rf lost+found/
!docker build -f Dockerfile -t boston_housing_model:1.0 .

## We need to prepare the dataset.

Note this step may have been taken care of in the preprocessing portion of this workshop, but you need to make sure a sanitized and properly split training and test set exist in the `/input/data` directory so when you run your training job, the folder is properly mounted within the container at `/opt/ml/input`

In [None]:
!rm -rf input
!mkdir -p input/data/train
!mkdir -p input/data/validation

import pandas as pd
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split

boston = datasets.load_boston()

dataset = np.insert(boston.data, 0, boston.target,axis=1)

df = pd.DataFrame(data=dataset)

X = df.iloc[:,1:]
y = df.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

train_df = X_train.copy()
train_df.insert(0, 'medv', y_train)
train_df.to_csv("input/data/train/training.csv", sep=",", header=None, index=None)

test_df = X_test.copy()
test_df.insert(0, 'medv', y_test)
test_df.to_csv("input/data/validation/testing.csv", sep=",", header=None, index=None)

df.head()

## Now that we have the algorithm image we can run it to train/deploy a model

Just a basic test, using the local Docker daemon. We will simulate SageMaker calling our docker container for training and serving. We'll do that using the built-in Docker Daemon of the Jupyter Notebook Instance.

In [None]:
!rm -rf input/config && mkdir -p input/config

In [None]:
%%writefile input/config/hyperparameters.json
{"max_depth": 20, "n_estimators": 120}

In [None]:
%%writefile input/config/resourceconfig.json
{"current_host": "localhost", "hosts": ["algo-1-kipw9"]}

In [None]:
%%writefile input/config/inputdataconfig.json
{"train": {"TrainingInputMode": "File"}, "validation": {"TrainingInputMode": "File"}}

In [None]:
%%time
!rm -rf model/
!mkdir -p model

print( "Training...")
!docker run --rm --name "my_model" \
    -v "$PWD/model:/opt/ml/model" \
    -v "$PWD/input:/opt/ml/input" boston_housing_model:1.0 train

## This is a serving test. It simulates an Endpoint exposed by Sagemaker.

After you execute the next cell, this Jupyter notebook will freeze. A webservice will be exposed at the port 8080.

Switch over to the notebook titled **BYOC-example-local-container-test** and walk through the steps to test you newly created custom container. When you are finished with that notebook, return to this notebook, hit the "**stop**" button above, and continue with the next cell.

In [None]:
!docker run --rm --name "my_model" \
    -p 8080:8080 \
    -v "$PWD/model:/opt/ml/model" \
    -v "$PWD/input:/opt/ml/input" boston_housing_model:1.0 serve

## It's time to push to push you custom model up to Elastic Container Registry (ECR) so it can be used by other ML workflows whenever you want.

In [None]:
import boto3

sts_client = boto3.client("sts")
session = boto3.session.Session()

account_id = sts_client.get_caller_identity()["Account"]
region = session.region_name
credentials = session.get_credentials()
credentials = credentials.get_frozen_credentials()

repo_name="boston-housing-model"
image_tag="latest"
fullname="{}.dkr.ecr.{}.amazonaws.com/{}:{}".format(account_id,region,repo_name,image_tag)
print(fullname)

We will now create a repository in Elastic Container Registry (if one does not already exist with the given repo_name), build and push our docker container. This is done with a bash script for ease of use. 

In [None]:
%%bash

# The name of our algorithm
algorithm_name=boston-housing-model

#cd container

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
#region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build -q -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

## Great! You can now navigate to the ECR service in the console and verify the given registry was created and a container with "fullname" was pushed.

Once you have verified that you docker container was properly pushed to ECR, you can now run some local tests as we did before, but this time, pulling down the freshly built custom container residing in your private registry in ECR. And of course, this the container residing on ECR can be accessed (with the proper credentials) and used in other ML workloads, and does not have to be run on Sagemaker.

# Integrated Test

In this test, we'll use a SageMaker Estimator (https://sagemaker.readthedocs.io/en/stable/estimators.html) to encapsulate the docker image published to ECR and start a local test, but this time, using the SageMaker library.

In [None]:
import sagemaker
import json
from sagemaker import get_execution_role

role = get_execution_role()
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
prefix='BYOM/boston-housing'

## Upload the dataset

In the previous exercise, we prepared the training and validation dataset. Now, we'll upload the CSVs to S3 and share them with an Estimator.

In [None]:
train_path = sagemaker_session.upload_data(path='input/data/train', key_prefix='boston-housing-model/input/train')
test_path = sagemaker_session.upload_data(path='input/data/validation', key_prefix='boston-housing-model/input/validation')
print("Train: %s\nValidation: %s" % (train_path, test_path) )

## Now we can use a Sagemaker Estimator for training and deploying the custom container we've created

In [None]:
# Create the estimator
# boston-housing-model:latest is the name of the container created in the previous exercise
# An image with that name:tag was pushed to the ECR.
boston = sagemaker.estimator.Estimator('boston-housing-model:latest',
                                    role,
                                    instance_count=1, 
                                    instance_type='local',
                                    output_path='s3://{}/{}/output'.format(bucket, prefix))
hyperparameters = {
    'max_depth': 20,
    'n_estimators': 120
}

print(hyperparameters)
boston.set_hyperparameters(**hyperparameters)

After you call .fit, a new training job will be executed inside the *local Docker daemon* and not in the SageMaker environment.

Note: this is the same workflow for using hosted training and deployment on Sagemaker, however there are a few additional security configurations that are required given the Sagemaker service will be the one requesting the custom docker container from your private registry. You can read more about it and see the sample code to create the necessary **RespositoryAccessMode** and **vpc_config** here: https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-containers-inference-private.html#your-algorithms-containers-inference-private-use

In [None]:
boston.fit({'train': train_path, 'validation': test_path })

The next command will launch a new container in your local Docker daemon. Then you can use the returned predictor for testing your model.

In [None]:
boston_predictor = boston.deploy(initial_instance_count=1, instance_type='local')

Now, let's use the predictor (https://sagemaker.readthedocs.io/en/stable/predictors.html) for some tests.

In [None]:
import pandas as pd
import random
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer

# configure the predictor to do everything for us
boston_predictor.serializer = CSVSerializer()
boston_predictor.deserializer = CSVDeserializer()

# load the testing data from the validation csv
validation = pd.read_csv('input/data/validation/testing.csv', header=None)
idx = random.randint(0,len(validation)-5)
req = validation.iloc[idx:idx+5].values

# cut a sample with 5 lines from our dataset and then split the label from the features.
X = req[:,1:].tolist()
y = req[:,0].tolist()

# call the local endpoint
for features,label in zip(X,y):
    prediction = boston_predictor.predict(features)

    # compare the results
    print("RESULT: {} == {} ? {}".format( label, prediction, label == prediction ) )

As you can see the predicted results are close but not perfect. We used several default hyperparamters during the training. Try customizing the hyperparamters and see if you can achieve a more accurate result!

View the hyperparameters here: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html

## Clean Up 

In [None]:
boston_predictor.delete_endpoint()