## Train a Scikit-Learn Model using SageMaker
#### Bring Your Own Container (BYOC)

### 1. Create Train Script 

In [1]:
%%file train
#!/usr/bin/env python

from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import pickle
import os


np.random.seed(123)

# Define paths for Model Training inside Container.
INPUT_PATH = '/opt/ml/input/data'
OUTPUT_PATH = '/opt/ml/output'
MODEL_PATH = '/opt/ml/model'
PARAM_PATH = '/opt/ml/input/config/hyperparameters.json'

# Training data sitting in S3 will be copied to this location during training when used with File MODE.
TRAIN_DATA_PATH = f'{INPUT_PATH}/train'
TEST_DATA_PATH = f'{INPUT_PATH}/test'

def train():
    print("------- [STARTING TRAINING] -------")
    train_df = pd.read_csv(os.path.join(TRAIN_DATA_PATH, 'train.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    train_df.head()
    X_train = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_train = train_df['class']
    knn = KNeighborsClassifier()
    knn.fit(X_train, y_train)
    # Save the trained Model inside the Container
    with open(os.path.join(MODEL_PATH, 'model.pkl'), 'wb') as out:
        pickle.dump(knn, out)
    print("------- [TRAINING COMPLETE!] -------")
    
    print("------- [STARTING EVALUATION] -------")
    test_df = pd.read_csv(os.path.join(TEST_DATA_PATH, 'test.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    X_test = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_test = train_df['class']
    acc = knn.score(X_test, y_test)
    print('Accuracy = {:.2f}%'.format(acc * 100))
    print("------- [EVALUATION DONE!] -------")

if __name__ == '__main__':
    train()

Overwriting train


### 2. Create Serve Script

In [2]:
%%file serve
#!/usr/bin/env python

from flask import Flask, Response, request
from io import StringIO
import pandas as pd
import numpy as np
import logging
import pickle
import json
import os


app = Flask(__name__)

MODEL_PATH = '/opt/ml/model'

# Singleton Class for holding the Model
class Predictor:
    model = None
    
    @classmethod
    def load_model(cls):
        print('[LOADING MODEL]')
        if cls.model is None:
            with open(os.path.join(MODEL_PATH, 'model.pkl'), 'rb') as file_:
                cls.model = pickle.load(file_)
        print('MODEL LOADED!')
        return cls.model
    
    @classmethod
    def predict(cls, X):
        X = X.reshape(1, -1)
        clf = cls.load_model()
        return clf.predict(X)

@app.route('/ping', methods=['GET'])
def ping():
    print('[HEALTH CHECK]')
    model = Predictor.load_model()
    status = 200
    if model is None:
        status = 404
    return Response(response={"HEALTH CHECK": "OK"}, status=status, mimetype='application/json')

@app.route('/invocations', methods=['POST'])
def invoke():
    data = None
    if request.content_type == 'application/json':
        data = request.data
        data = json.loads(data.decode('utf8'))
        features = data['instances']
        features = np.array(features)
    else:
        return Response(response='This Predictor only supports JSON data', status=415, mimetype='text/plain')

    prediction = Predictor.predict(features)    
    result = {'predictions': prediction.tolist()}
    result = json.dumps(result, indent=2).encode('utf-8')
    return Response(response=result, status=200, mimetype='application/json')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)

Overwriting serve


### 3. Build a Docker Image and Push to ECR

<p>Build the docker image and push to ECR and have the image URI handy for the next steps.</p>

#### Build Dockerfile

In [3]:
!docker build -t sagemaker-byoc-sklearn -f Dockerfile .

Sending build context to Docker daemon  115.2kB
Step 1/8 : FROM python:3.7
3.7: Pulling from library/python

[1Bc589d5f9: Pulling fs layer 
[1Be46d8b5f: Pulling fs layer 
[1B8ad42f0d: Pulling fs layer 
[1B137f8d26: Pulling fs layer 
[1Bf6ed9b0c: Pulling fs layer 
[1B279f50e0: Pulling fs layer 
[1B8cd4d4c8: Pulling fs layer 
[1B0f545211: Pulling fs layer 
[1BDigest: sha256:0a2f2121ff7d017e873992ca23ab8516786913cc3cde8270a88051ab6379dd06[5A[2K[5A[2K[5A[2K[6A[2K[5A[2K[3A[2K[4A[2K[3A[2K[2A[2K[9A[2K[1A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[9A[2K[5A[2K[5A[2K[9A[2K[9A[2K[9A[2K[9A[2K[9A[2K[8A[2K[8A[2K[8A[2K[8A[2K[7A[2K[7A[2K[7A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K

Every time BEFORE we rebuild the docker image making changes to the training or the inference scripts, run the below commands.

`docker stop $(docker ps -a -q)`<br>
`docker rm $(docker ps -a -q)`<br>
`docker image prune -a`

This is to stop all running docker processes and reflush images.

#### Push local Docker image to ECR (can be SKIPPED since running in Local Mode)

In [4]:
%%sh

# Specify a name to your custom container
container_name=sagemaker-byoc-sklearn
echo "Container Name: " ${container_name}

# Retreive AWS account ID
account=$(aws sts get-caller-identity --query Account --output text)

# Get the AWS region defined in the current configuration (default to us-east-1 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

echo "Account: " ${account}
echo "Region: "${region}

repository="${account}.dkr.ecr.${region}.amazonaws.com"
echo "ECR Repository: " ${repository}

image="${account}.dkr.ecr.${region}.amazonaws.com/${container_name}:latest"
echo "ECR Image URI: " ${image}

# If the ECR repository does not exist, create it.
aws ecr describe-repositories --repository-names ${container_name} > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name ${container_name} > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${repository}

# Tag the local image with ECR image name
docker tag ${container_name} ${image}

# Finally, push the local docker image to ECR with the full ECR image name
docker push ${image}

Container Name:  sagemaker-byoc-sklearn
Account:  892313895307
Region: us-east-1
ECR Repository:  892313895307.dkr.ecr.us-east-1.amazonaws.com
ECR Image URI:  892313895307.dkr.ecr.us-east-1.amazonaws.com/sagemaker-byoc-sklearn:latest
Login Succeeded
The push refers to repository [892313895307.dkr.ecr.us-east-1.amazonaws.com/sagemaker-byoc-sklearn]
638ad6f92a0f: Preparing
51c52955af91: Preparing
6ce7ccc1396d: Preparing
f9cdc0b28671: Preparing
90e90bb010df: Preparing
f078a683635a: Preparing
78e4e1f4c63c: Preparing
1aec1a899afd: Preparing
0cd56214ad4c: Preparing
5c4d1446babf: Preparing
a777ce0e8966: Preparing
da2a03e6ee94: Preparing
3e29ce682bef: Preparing
a576cb5bb7d1: Preparing
c1bcddf0e470: Preparing
78e4e1f4c63c: Waiting
1aec1a899afd: Waiting
0cd56214ad4c: Waiting
5c4d1446babf: Waiting
a777ce0e8966: Waiting
da2a03e6ee94: Waiting
3e29ce682bef: Waiting
a576cb5bb7d1: Waiting
c1bcddf0e470: Waiting
f078a683635a: Waiting
f9cdc0b28671: Pushed
6ce7ccc1396d: Pushed
638ad6f92a0f: Pushed
51c5295

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



### 4. Train your Custom Sklearn Model using SageMaker Training

### Imports 

In [13]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer
from time import gmtime, strftime
import pandas as pd
import sagemaker

### Essentials

In [6]:
role = sagemaker.get_execution_role()
session = sagemaker.Session()
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image_name = 'sagemaker-byoc-sklearn'
image_uri = f'{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest'  # local copy to be pushed to ECR

### Train (using SageMaker)

In [9]:
WORK_DIRECTORY = '../.././DATA'

train_data_s3_pointer = session.upload_data(f'{WORK_DIRECTORY}/train', key_prefix='byoc-sklearn/train')
test_data_s3_pointer = session.upload_data(f'{WORK_DIRECTORY}/test', key_prefix='byoc-sklearn/test')

In [10]:
model = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    sagemaker_session=None
)

In [11]:
model.fit({'train': train_data_s3_pointer, 'test': test_data_s3_pointer})

2021-03-28 21:02:40 Starting - Starting the training job...
2021-03-28 21:03:06 Starting - Launching requested ML instancesProfilerReport-1616965360: InProgress
.........
2021-03-28 21:04:26 Starting - Preparing the instances for training...
2021-03-28 21:05:08 Downloading - Downloading input data
2021-03-28 21:05:08 Training - Downloading the training image.....[34m------- [STARTING TRAINING] -------[0m
[34m------- [TRAINING COMPLETE!] -------[0m
[34m------- [STARTING EVALUATION] -------[0m
[34mAccuracy = 82.42%[0m
[34m------- [EVALUATION DONE!] -------[0m

2021-03-28 21:06:06 Uploading - Uploading generated training model
2021-03-28 21:06:06 Completed - Training job completed
Training seconds: 68
Billable seconds: 68


### Deploy Trained Model as SageMaker Endpoint

In [14]:
current_timestamp = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
json_serializer = JSONSerializer()
json_deserializer = JSONDeserializer()
predictor = model.deploy(1, 
                         'ml.m5.xlarge', 
                         endpoint_name=f'emr-byoc-sklearn-{current_timestamp}', 
                         serializer=json_serializer,
                         deserializer=json_deserializer)

-----------!

### Real Time Inference using Deployed Endpoint

In [15]:
df = pd.read_csv('../.././DATA/test/test.csv', header=None)
test_df = df.sample(1)

In [16]:
test_df.drop(test_df.columns[[0]], axis=1, inplace=True)
test_df

Unnamed: 0,1,2,3,4
2241,0.022906,0.179054,0.228029,-0.431359


In [17]:
x = test_df.values
x

array([[ 0.02290556,  0.17905413,  0.22802851, -0.43135912]])

In [18]:
x_ = list(x[0])

In [19]:
x_

[0.02290556213751108,
 0.1790541330317105,
 0.22802851099356705,
 -0.431359120978368]

In [20]:
payload = {'instances': [x_]}
payload

{'instances': [[0.02290556213751108,
   0.1790541330317105,
   0.22802851099356705,
   -0.431359120978368]]}

In [21]:
prediction = predictor.predict(payload)

In [22]:
prediction

{'predictions': [0]}