## Train a Scikit-Learn Model using SageMaker Container Mode
#### Bring Your Own Container (BYOC)

### 1. Create Train Script 

In [2]:
%%file train
#!/usr/bin/env python

from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import pickle
import os


np.random.seed(123)

# Define paths for Model Training inside Container.
INPUT_PATH = '/opt/ml/input/data'
OUTPUT_PATH = '/opt/ml/output'
MODEL_PATH = '/opt/ml/model'
PARAM_PATH = '/opt/ml/input/config/hyperparameters.json'

# Training data sitting in S3 will be copied to this location during training when used with File MODE.
TRAIN_DATA_PATH = f'{INPUT_PATH}/train'
TEST_DATA_PATH = f'{INPUT_PATH}/test'

def train():
    print("------- [STARTING TRAINING] -------")
    train_df = pd.read_csv(os.path.join(TRAIN_DATA_PATH, 'train.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    train_df.head()
    X_train = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_train = train_df['class']
    knn = KNeighborsClassifier()
    knn.fit(X_train, y_train)
    # Save the trained Model inside the Container
    with open(os.path.join(MODEL_PATH, 'model.pkl'), 'wb') as out:
        pickle.dump(knn, out)
    print("------- [TRAINING COMPLETE!] -------")
    
    print("------- [STARTING EVALUATION] -------")
    test_df = pd.read_csv(os.path.join(TEST_DATA_PATH, 'test.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    X_test = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_test = train_df['class']
    acc = knn.score(X_test, y_test)
    print('Accuracy = {:.2f}%'.format(acc * 100))
    print("------- [EVALUATION DONE!] -------")

if __name__ == '__main__':
    train()

Overwriting train


### 2. Create Serve Script

In [2]:
%%file serve
#!/usr/bin/env python

from flask import Flask, Response, request
from io import StringIO
import pandas as pd
import logging
import pickle
import json
import os


app = Flask(__name__)

MODEL_PATH = '/opt/ml/model'

# Singleton Class for holding the Model
class Predictor:
    model = None
    
    @classmethod
    def load_model(cls):
        print('[LOADING MODEL]')
        if cls.model is None:
            with open(os.path.join(MODEL_PATH, 'model.pkl'), 'rb') as file_:
                cls.model = pickle.load(file_)
        print('MODEL LOADED!')
        return cls.model
    
    @classmethod
    def predict(cls, X):
        clf = cls.load_model()
        return clf.predict(X)

@app.route('/ping', methods=['GET'])
def ping():
    print('[HEALTH CHECK]')
    model = Predictor.load_model()
    status = 200
    if model is None:
        status = 404
    return Response(response={"HEALTH CHECK": "OK"}, status=status, mimetype='application/json')

@app.route('/invocations', methods=['POST'])
def invoke():
    data = None

    # Transform Payload in CSV to Pandas DataFrame.
    if request.content_type == 'application/JSON':
        data = request.data.decode('utf-8')
        data = StringIO(data)
        data = pd.read_csv(data, header=None)
        # TODO add logic to read incoming payload in JSON
        # TODO
    else:
        return flask.Response(response='This Predictor only supports JSON data', status=415, mimetype='text/plain')

    logging.info('Invoked with {} records'.format(data.shape[0]))
    
    predictions = Predictor.predict(data)

    # Convert from numpy back to JSON
    out = StringIO()
    pd.DataFrame({'results': predictions}).to_csv(out, header=False, index=False)
    result = out.getvalue()

    return Response(response=result, status=200, mimetype='application/JSON')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)

Overwriting serve


### 3. Build a Docker Image and Push to ECR

<p>Build the docker image and push to ECR and have the image URI handy for the next steps.</p>

#### Build Dockerfile

In [5]:
!docker build -t sagemaker-byoc-sklearn -f Dockerfile .

Sending build context to Docker daemon  41.47kB
Step 1/8 : FROM python:3.7
3.7: Pulling from library/python

[1B22b926a1: Pulling fs layer 
[1B09ae8373: Pulling fs layer 
[1Be3daef68: Pulling fs layer 
[1B244fe254: Pulling fs layer 
[1Bbed20e89: Pulling fs layer 
[1B03a5a371: Pulling fs layer 
[1B42025f7f: Pulling fs layer 
[1B766649b2: Pulling fs layer 
[1BDigest: sha256:ed9c421e77eab107bb9ad431fa734f5407ebf8b241dcab2c22a7cfc76e9139f0[6A[2K[9A[2K[6A[2K[6A[2K[9A[2K[5A[2K[2A[2K[1A[2K[3A[2K[3A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[8A[2K[8A[2K[8A[2K[7A[2K[7A[2K[7A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K

#### Push local Docker image to ECR

In [6]:
%%sh

# Specify a name to your custom container
container_name=sagemaker-byoc-sklearn
echo "Container Name: " ${container_name}

# Retreive AWS account ID
account=$(aws sts get-caller-identity --query Account --output text)

# Get the AWS region defined in the current configuration (default to us-east-1 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

echo "Account: " ${account}
echo "Region: "${region}

repository="${account}.dkr.ecr.${region}.amazonaws.com"
echo "ECR Repository: " ${repository}

image="${account}.dkr.ecr.${region}.amazonaws.com/${container_name}:latest"
echo "ECR Image URI: " ${image}

# If the ECR repository does not exist, create it.
aws ecr describe-repositories --repository-names ${container_name} > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name ${container_name} > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${repository}

# Tag the local image with ECR image name
docker tag ${container_name} ${image}

# Finally, push the local docker image to ECR with the full ECR image name
docker push ${image}

Container Name:  sagemaker-byoc-sklearn
Account:  892313895307
Region: us-east-1
ECR Repository:  892313895307.dkr.ecr.us-east-1.amazonaws.com
ECR Image URI:  892313895307.dkr.ecr.us-east-1.amazonaws.com/sagemaker-byoc-sklearn:latest
Login Succeeded
The push refers to repository [892313895307.dkr.ecr.us-east-1.amazonaws.com/sagemaker-byoc-sklearn]
f7f117824127: Preparing
a00d2ba51cb5: Preparing
b1523eeca3f0: Preparing
8e3105203750: Preparing
6cb5a55ce424: Preparing
f078a683635a: Preparing
0b18c63fe124: Preparing
abb35d8edc01: Preparing
2cdb72475c99: Preparing
04d1717d0e01: Preparing
dacb447ffe30: Preparing
bde301416dd2: Preparing
81496d8c72c2: Preparing
644448d6e877: Preparing
0e41e5bdb921: Preparing
04d1717d0e01: Waiting
dacb447ffe30: Waiting
f078a683635a: Waiting
bde301416dd2: Waiting
81496d8c72c2: Waiting
0b18c63fe124: Waiting
644448d6e877: Waiting
0e41e5bdb921: Waiting
abb35d8edc01: Waiting
2cdb72475c99: Waiting
a00d2ba51cb5: Pushed
b1523eeca3f0: Pushed
8e3105203750: Pushed
f7f1178

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



### 4. Train your Custom Sklearn Model using SageMaker Training

### Imports 

In [7]:
from sagemaker.serializers import JSONSerializer
import pandas as pd
import sagemaker

### Essentials

In [8]:
role = sagemaker.get_execution_role()
session = sagemaker.Session()
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image_name = 'sagemaker-byoc-sklearn'
image_uri = f'{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest'

### Train (using SageMaker)

In [10]:
WORK_DIRECTORY = '../.././DATA'

train_data_s3_pointer = session.upload_data(f'{WORK_DIRECTORY}/train', key_prefix='byoc-sklearn/train')
test_data_s3_pointer = session.upload_data(f'{WORK_DIRECTORY}/test', key_prefix='byoc-sklearn/test')

In [11]:
train_data_s3_pointer

's3://sagemaker-us-east-1-892313895307/byoc-sklearn/train'

In [12]:
test_data_s3_pointer

's3://sagemaker-us-east-1-892313895307/byoc-sklearn/test'

In [13]:
model = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    sagemaker_session=session  # ensure the session is set to session
)

In [14]:
model.fit({'train': train_data_s3_pointer, 'test': test_data_s3_pointer})

2021-03-26 17:48:39 Starting - Starting the training job...
2021-03-26 17:48:41 Starting - Launching requested ML instancesProfilerReport-1616780918: InProgress
.........
2021-03-26 17:50:33 Starting - Preparing the instances for training...
2021-03-26 17:51:07 Downloading - Downloading input data...
2021-03-26 17:51:33 Training - Downloading the training image...
2021-03-26 17:52:05 Uploading - Uploading generated training model[34m------- [STARTING TRAINING] -------[0m
[34m------- [TRAINING COMPLETE!] -------[0m
[34m------- [STARTING EVALUATION] -------[0m
[34mAccuracy = 82.42%[0m
[34m------- [EVALUATION DONE!] -------[0m

2021-03-26 17:52:33 Completed - Training job completed
Training seconds: 65
Billable seconds: 65


### Deploy Trained Model as SageMaker Endpoint

In [10]:
json_serializer = JSONSerializer()
predictor = model.deploy(1, 'ml.m5.xlarge', 
                         endpoint_name='emr-byoc-sklearn', 
                         serializer=json_serializer)

-------------!

### Real Time Inference using Deployed Endpoint

In [11]:
df = pd.read_csv('.././DATA/test/test.csv', header=None)
test_df = df.sample(1)

In [12]:
test_df.drop(test_df.columns[[0]], axis=1, inplace=True)
test_df

Unnamed: 0,1,2,3,4
1608,0.733637,0.347981,0.228029,0.162324


In [13]:
test_df.values

array([[0.73363737, 0.3479813 , 0.22802851, 0.16232361]])

In [14]:
prediction = predictor.predict(test_df.values).decode('utf-8').strip()

In [15]:
prediction

'0'