## Train a Scikit-Learn Model using SageMaker (Local Mode)
#### Bring Your Own Container (BYOC)

### 1. Create Train Script 

In [20]:
%%file train
#!/usr/bin/env python

from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import pickle
import os


np.random.seed(123)

# Define paths for Model Training inside Container.
INPUT_PATH = '/opt/ml/input/data'
OUTPUT_PATH = '/opt/ml/output'
MODEL_PATH = '/opt/ml/model'
PARAM_PATH = '/opt/ml/input/config/hyperparameters.json'

# Training data sitting in S3 will be copied to this location during training when used with File MODE.
TRAIN_DATA_PATH = f'{INPUT_PATH}/train'
TEST_DATA_PATH = f'{INPUT_PATH}/test'

def train():
    print("------- [STARTING TRAINING] -------")
    train_df = pd.read_csv(os.path.join(TRAIN_DATA_PATH, 'train.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    train_df.head()
    X_train = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_train = train_df['class']
    knn = KNeighborsClassifier()
    knn.fit(X_train, y_train)
    # Save the trained Model inside the Container
    with open(os.path.join(MODEL_PATH, 'model.pkl'), 'wb') as out:
        pickle.dump(knn, out)
    print("------- [TRAINING COMPLETE!] -------")
    
    print("------- [STARTING EVALUATION] -------")
    test_df = pd.read_csv(os.path.join(TEST_DATA_PATH, 'test.csv'), names=['class', 'bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate'])
    X_test = train_df[['bmi', 'diastolic_bp_change', 'systolic_bp_change', 'respiratory_rate']]
    y_test = train_df['class']
    acc = knn.score(X_test, y_test)
    print('Accuracy = {:.2f}%'.format(acc * 100))
    print("------- [EVALUATION DONE!] -------")

if __name__ == '__main__':
    train()

Overwriting train


### 2. Create Serve Script

In [21]:
%%file serve
#!/usr/bin/env python

from flask import Flask, Response, request
from io import StringIO
import pandas as pd
import numpy as np
import logging
import pickle
import json
import os


app = Flask(__name__)

MODEL_PATH = '/opt/ml/model'

# Singleton Class for holding the Model
class Predictor:
    model = None
    
    @classmethod
    def load_model(cls):
        print('[LOADING MODEL]')
        if cls.model is None:
            with open(os.path.join(MODEL_PATH, 'model.pkl'), 'rb') as file_:
                cls.model = pickle.load(file_)
        print('MODEL LOADED!')
        return cls.model
    
    @classmethod
    def predict(cls, X):
        X = X.reshape(1, -1)
        print(f'X: {X}')
        clf = cls.load_model()
        return clf.predict(X)

@app.route('/ping', methods=['GET'])
def ping():
    print('[HEALTH CHECK]')
    model = Predictor.load_model()
    status = 200
    if model is None:
        status = 404
    return Response(response={"HEALTH CHECK": "OK"}, status=status, mimetype='application/json')

@app.route('/invocations', methods=['POST'])
def invoke():
    data = None
    if request.content_type == 'application/json':
        data = request.data
        data = json.loads(data.decode('utf8'))
        features = data['instances']
        features = np.array(features)
    else:
        return Response(response='This Predictor only supports JSON data', status=415, mimetype='text/plain')

    prediction = Predictor.predict(features)
    print('prediction', prediction)
    
    result = {'predictions': prediction.tolist()}
    result = json.dumps(result, indent=2).encode('utf-8')
    return Response(response=result, status=200, mimetype='application/json')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)

Overwriting serve


### 3. Build a Docker Image and Push to ECR

<p>Build the docker image and push to ECR and have the image URI handy for the next steps.</p>

#### Build Dockerfile

In [22]:
!docker build -t sagemaker-byoc-sklearn -f Dockerfile .

Sending build context to Docker daemon  134.7kB
Step 1/8 : FROM python:3.7
3.7: Pulling from library/python

[1Bc589d5f9: Pulling fs layer 
[1Be46d8b5f: Pulling fs layer 
[1B8ad42f0d: Pulling fs layer 
[1B137f8d26: Pulling fs layer 
[1Bf6ed9b0c: Pulling fs layer 
[1B279f50e0: Pulling fs layer 
[1B8cd4d4c8: Pulling fs layer 
[1B0f545211: Pulling fs layer 
[1B69c80101: Pull complete 165MB/2.165MBB[8A[2K[7A[2K[6A[2K[9A[2K[6A[2K[9A[2K[6A[2K[9A[2K[6A[2K[4A[2K[6A[2K[4A[2K[3A[2K[9A[2K[1A[2K[3A[2K[9A[2K[9A[2K[5A[2K[9A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[9A[2K[5A[2K[5A[2K[9A[2K[5A[2K[5A[2K[5A[2K[9A[2K[8A[2K[8A[2K[8A[2K[8A[2K[7A[2K[7A[2K[7A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[6A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K[5A[2K

Every time BEFORE we rebuild the docker image making changes to the training or the inference scripts, run the below commands.

`docker stop $(docker ps -a -q)`<br>
`docker rm $(docker ps -a -q)`<br>
`docker image prune -a`

This is to stop all running docker processes and reflush images.

#### Push local Docker image to ECR (can be SKIPPED since running in Local Mode)

In [None]:
%%sh

# Specify a name to your custom container
container_name=sagemaker-byoc-sklearn
echo "Container Name: " ${container_name}

# Retreive AWS account ID
account=$(aws sts get-caller-identity --query Account --output text)

# Get the AWS region defined in the current configuration (default to us-east-1 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

echo "Account: " ${account}
echo "Region: "${region}

repository="${account}.dkr.ecr.${region}.amazonaws.com"
echo "ECR Repository: " ${repository}

image="${account}.dkr.ecr.${region}.amazonaws.com/${container_name}:latest"
echo "ECR Image URI: " ${image}

# If the ECR repository does not exist, create it.
aws ecr describe-repositories --repository-names ${container_name} > /dev/null 2>&1
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name ${container_name} > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region} | docker login --username AWS --password-stdin ${repository}

# Tag the local image with ECR image name
docker tag ${container_name} ${image}

# Finally, push the local docker image to ECR with the full ECR image name
docker push ${image}

### 4. Train your Custom Sklearn Model using SageMaker Training

### Imports 

In [23]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer
import pandas as pd
import sagemaker

### Essentials

In [24]:
role = sagemaker.get_execution_role()
session = sagemaker.Session()
account = session.boto_session.client('sts').get_caller_identity()['Account']
region = session.boto_session.region_name
image_name = 'sagemaker-byoc-sklearn'
#image_uri = f'{account}.dkr.ecr.{region}.amazonaws.com/{image_name}:latest'  # local copy to be pushed to ECR
image_uri = f'{image_name}:latest' # refer to the local docker image 

### Train (Local Mode)

In [40]:
model = sagemaker.estimator.Estimator(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type='local',
    sagemaker_session=None
)

In [41]:
model.fit({'train': 'file://../.././DATA/train/train.csv', 'test': 'file://../.././DATA/test/test.csv'})

Creating 2gh92nlho5-algo-1-vhzza ... 
Creating 2gh92nlho5-algo-1-vhzza ... done
Attaching to 2gh92nlho5-algo-1-vhzza
[36m2gh92nlho5-algo-1-vhzza |[0m ------- [STARTING TRAINING] -------
[36m2gh92nlho5-algo-1-vhzza |[0m ------- [TRAINING COMPLETE!] -------
[36m2gh92nlho5-algo-1-vhzza |[0m ------- [STARTING EVALUATION] -------
[36m2gh92nlho5-algo-1-vhzza |[0m Accuracy = 82.42%
[36m2gh92nlho5-algo-1-vhzza |[0m ------- [EVALUATION DONE!] -------
[36m2gh92nlho5-algo-1-vhzza exited with code 0
[0mAborting on container exit...
===== Job Complete =====


### Deploy Trained Model as SageMaker Endpoint

In [42]:
json_serializer = JSONSerializer()
json_deserializer = JSONDeserializer()
predictor = model.deploy(1, 
                         'local', 
                         endpoint_name='emr-byoc-sklearn', 
                         serializer=json_serializer,
                         deserializer=json_deserializer)

Attaching to ham81dqy28-algo-1-t81d8
[36mham81dqy28-algo-1-t81d8 |[0m  * Serving Flask app "serve" (lazy loading)
[36mham81dqy28-algo-1-t81d8 |[0m  * Environment: production
[36mham81dqy28-algo-1-t81d8 |[0m [2m   Use a production WSGI server instead.[0m
[36mham81dqy28-algo-1-t81d8 |[0m  * Debug mode: off
[36mham81dqy28-algo-1-t81d8 |[0m  * Running on http://0.0.0.0:8080/ (Press CTRL+C to quit)
[36mham81dqy28-algo-1-t81d8 |[0m [HEALTH CHECK]
[36mham81dqy28-algo-1-t81d8 |[0m [LOADING MODEL]
[36mham81dqy28-algo-1-t81d8 |[0m MODEL LOADED!
[36mham81dqy28-algo-1-t81d8 |[0m 172.18.0.1 - - [28/Mar/2021 16:36:21] "[37mGET /ping HTTP/1.1[0m" 200 -
!

### Real Time Inference using Deployed Endpoint

In [43]:
df = pd.read_csv('../.././DATA/test/test.csv', header=None)
test_df = df.sample(1)

In [44]:
test_df.drop(test_df.columns[[0]], axis=1, inplace=True)
test_df

Unnamed: 0,1,2,3,4
1586,-1.161747,-0.758273,0.394461,0.481227


In [45]:
x = test_df.values
x

array([[-1.16174745, -0.75827256,  0.3944615 ,  0.48122704]])

In [46]:
x_ = list(x[0])

In [47]:
x_

[-1.1617474484872234,
 -0.7582725561441886,
 0.3944614964772361,
 0.4812270430077439]

In [48]:
payload = {'instances': [x_]}
payload

{'instances': [[-1.1617474484872234,
   -0.7582725561441886,
   0.3944614964772361,
   0.4812270430077439]]}

In [49]:
prediction = predictor.predict(payload)

[36mham81dqy28-algo-1-t81d8 |[0m X: [[-1.16174745 -0.75827256  0.3944615   0.48122704]]
[36mham81dqy28-algo-1-t81d8 |[0m [LOADING MODEL]
[36mham81dqy28-algo-1-t81d8 |[0m MODEL LOADED!
[36mham81dqy28-algo-1-t81d8 |[0m prediction [0]
[36mham81dqy28-algo-1-t81d8 |[0m 172.18.0.1 - - [28/Mar/2021 16:36:24] "[37mPOST /invocations HTTP/1.1[0m" 200 -


In [50]:
prediction

{'predictions': [0]}