## Train a Scikit-Learn Model using SageMaker Container Mode
### Bring Your Own Container (BYOC) + SageMaker Serverless Inference

### 1. Create Train Script 

In [None]:
%%file train
#!/usr/bin/env python

from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import pickle
import os


np.random.seed(123)

# Define paths for Model Training inside Container.
INPUT_PATH = '/opt/ml/input/data'
OUTPUT_PATH = '/opt/ml/output'
MODEL_PATH = '/opt/ml/model'
PARAM_PATH = '/opt/ml/input/config/hyperparameters.json'

# Training data sitting in S3 will be copied to this location during training when used with File MODE.
TRAIN_DATA_PATH = f'{INPUT_PATH}/train'
TEST_DATA_PATH = f'{INPUT_PATH}/test'

def train():
    print("------- [STARTING TRAINING] -------")
    train_df = pd.read_csv(os.path.join(TRAIN_DATA_PATH, 'train.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    train_df.head()
    X_train = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_train = train_df['class']
    knn = KNeighborsClassifier()
    knn.fit(X_train, y_train)
    # Save the trained Model inside the Container
    with open(os.path.join(MODEL_PATH, 'model.pkl'), 'wb') as out:
        pickle.dump(knn, out)
    print("------- [TRAINING COMPLETE!] -------")
    
    print("------- [STARTING EVALUATION] -------")
    test_df = pd.read_csv(os.path.join(TEST_DATA_PATH, 'test.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    X_test = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_test = train_df['class']
    acc = knn.score(X_test, y_test)
    print('Accuracy = {:.2f}%'.format(acc * 100))
    print("------- [EVALUATION DONE!] -------")

if __name__ == '__main__':
    train()

### 2. Create Serve Script

In [None]:
%%file serve
#!/usr/bin/env python

from flask import Flask, Response, request
from io import StringIO
import pandas as pd
import logging
import pickle
import os


app = Flask(__name__)

MODEL_PATH = '/opt/ml/model'

# Singleton Class for holding the Model
class Predictor:
    model = None
    
    @classmethod
    def load_model(cls):
        print('[LOADING MODEL]')
        if cls.model is None:
            with open(os.path.join(MODEL_PATH, 'model.pkl'), 'rb') as file_:
                cls.model = pickle.load(file_)
        print('MODEL LOADED!')
        return cls.model
    
    @classmethod
    def predict(cls, X):
        clf = cls.load_model()
        return clf.predict(X)

@app.route('/ping', methods=['GET'])
def ping():
    print('[HEALTH CHECK]')
    model = Predictor.load_model()
    status = 200
    if model is None:
        status = 404
    return Response(response={"HEALTH CHECK": "OK"}, status=status, mimetype='application/json')

@app.route('/invocations', methods=['POST'])
def invoke():
    data = None

    # Transform Payload in CSV to Pandas DataFrame.
    if request.content_type == 'text/csv':
        data = request.data.decode('utf-8')
        data = StringIO(data)
        data = pd.read_csv(data, header=None)
    else:
        return flask.Response(response='This Predictor only supports CSV data', status=415, mimetype='text/plain')

    logging.info('Invoked with {} records'.format(data.shape[0]))
    
    predictions = Predictor.predict(data)

    # Convert from numpy back to CSV
    out = StringIO()
    pd.DataFrame({'results': predictions}).to_csv(out, header=False, index=False)
    result = out.getvalue()

    return Response(response=result, status=200, mimetype='text/csv')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)

### 3. Build a Docker Image and Push to ECR

<p>Build the docker image and push to ECR and have the image URI handy for the next steps.</p>

In [None]:
!docker build -t sagemaker-byoc-sklearn -f Dockerfile .

In [None]:
%%sh

# Specify a name to your custom container
container_name=sagemaker-byoc-sklearn
echo "Container Name: " ${container_name}

# Retreive AWS account ID
account=$(aws sts get-caller-identity --query Account --output text)

# Get the AWS region defined in the current configuration (default to us-east-1 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

echo "Account: " ${account}
echo "Region: "${region}

repository="${account}.dkr.ecr.${region}.amazonaws.com"
echo "ECR Repository: " ${repository}

image="${account}.dkr.ecr.${region}.amazonaws.com/${container_name}:latest"
echo "ECR Image URI: " ${image}

# If the ECR repository does not exist, create it.
aws ecr describe-repositories --repository-names ${container_name} > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name ${container_name} > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${repository}

# Tag the local image with ECR image name
docker tag ${container_name} ${image}

# Finally, push the local docker image to ECR with the full ECR image name
docker push ${image}

### 4. Train your Custom Sklearn Model using SageMaker Training

### Imports 

In [None]:
from sagemaker.serializers import CSVSerializer
import pandas as pd
import sagemaker

### Essentials

In [None]:
role = sagemaker.get_execution_role()
session = sagemaker.Session()
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image_name = 'sagemaker-byoc-sklearn'
image_uri = f'{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest'

In [None]:
image_uri

### Train (using SageMaker)

In [None]:
WORK_DIRECTORY = '.././DATA'

train_data_s3_pointer = session.upload_data(f'{WORK_DIRECTORY}/train', key_prefix='byoc-sklearn/train')
test_data_s3_pointer = session.upload_data(f'{WORK_DIRECTORY}/test', key_prefix='byoc-sklearn/test')

In [None]:
train_data_s3_pointer

In [None]:
test_data_s3_pointer

In [None]:
model = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    sagemaker_session=session  # ensure the session is set to session
)

In [None]:
model.fit({'train': train_data_s3_pointer, 'test': test_data_s3_pointer})

In [None]:
model._current_job_name

### Imports for Inference

In [2]:
from time import gmtime, strftime
import sagemaker
import datetime
import boto3

In [3]:
role = sagemaker.get_execution_role()
session = sagemaker.Session()
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image_name = 'sagemaker-byoc-sklearn'
image_uri = f'{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest'
image_uri

'119174016168.dkr.ecr.us-east-1.amazonaws.com/sagemaker-byoc-sklearn:latest'

In [4]:
s3_bucket = session.default_bucket()
s3_bucket

'sagemaker-us-east-1-119174016168'

In [5]:
sagemaker_client = boto3.client('sagemaker', region_name='us-east-1')

In [None]:
TRAINING_JOB_NAME = 'sagemaker-byoc-sklearn-2022-10-03-17-40-57-525' # Copy this from the AWS SageMaker console
#TRAINING_JOB_NAME = model._current_job_name
MODEL_ARTIFACTS_LOCATION = f's3://{s3_bucket}/{TRAINING_JOB_NAME}/output/model.tar.gz'

### Re-Create Model

In [None]:
current_timestamp = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
MODEL_NAME = f'clf-byoc-model-{current_timestamp}'
MODEL_NAME

In [None]:
BYO_CONTAINER_ENV_VARS = {"SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SOME_ENV_VAR": "myEnvVar"}

create_model_response = sagemaker_client.create_model(
    ModelName=MODEL_NAME,
    Containers=[
        {
            "Image": image_uri,
            "Mode": "SingleModel",
            "ModelDataUrl": MODEL_ARTIFACTS_LOCATION,
            "Environment": BYO_CONTAINER_ENV_VARS,
        }
    ],
    ExecutionRoleArn=role)

print("Model Arn: " + create_model_response["ModelArn"])

### Create Endpoint Config

This is where you can adjust the Serverless Configuration for your endpoint. The current max concurrent invocations for a single endpoint, known as MaxConcurrency, can be any value from 1 to 200, and MemorySize can be any of the following: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.

In [None]:
ep_config_name = "byoc-serverless-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=ep_config_name,
    ProductionVariants=[
        {
            "VariantName": "byoc-serverless",
            "ModelName": MODEL_NAME,
            "ServerlessConfig": {
                "MemorySizeInMB": 4096,
                "MaxConcurrency": 10,
            },
        },
    ],
)

print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])

### Create a Serverless Endpoint

In [None]:
endpoint_name = "byoc-serverless-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

create_endpoint_response = sagemaker_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=ep_config_name,
)

print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

In [None]:
endpoint_name

### Describe the Serverless Endpoint

In [None]:
# wait for endpoint to reach a terminal state (InService) using describe endpoint
import time

describe_endpoint_response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)

while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)

describe_endpoint_response

### Invoke a Serverless Endpoint 

In [None]:
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name='us-east-1')

In [None]:
%%time

response = sagemaker_runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=b".345,0.224414,.131102,0.0420",
    ContentType="text/csv",
)
response

In [None]:
predicted_label = response['Body'].read().decode('utf-8').strip()
predicted_label

### Invoke a Serverless Endpoint (Exception Scenario)

In [None]:
try:
    response = sagemaker_runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        Body=b".345,0.224414,.131102,0.0420,0.3434",
        ContentType="text/csv")
except Exception as e:
    print(e)