### Deploy machine learning models to Amazon SageMaker using the ezsmdeploy Python package and a few lines of code

In [29]:
%load_ext autoreload
%autoreload 2

import ezsmdeploy
import pickle

hv = pickle.load(open(os.path.join('LDA','models','hash_vect.pk'), 'rb'))

# def data_process(data):
#     data = [re.sub('[,\\.!?]', '', x) for x in data]
#     # Convert the titles to lowercase
#     data = [x.lower() for x in data]
#     # Remove post with less than 10 words
#     data = [x for x in data if len(x.split(' '))>10]
#     return hv.transform(data)

params = hv.get_params(True)
print(params)



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
{'alternate_sign': False, 'analyzer': 'word', 'binary': False, 'decode_error': 'strict', 'dtype': <class 'numpy.float64'>, 'encoding': 'utf-8', 'input': 'content', 'lowercase': True, 'n_features': 40000, 'ngram_range': (1, 1), 'norm': 'l2', 'preprocessor': None, 'stop_words': 'english', 'strip_accents': None, 'token_pattern': '(?u)\\b\\w\\w+\\b', 'tokenizer': None}


### Step 1 : Write a model transform script

Make sure you have a ...

**"load_model" function**

* input args are model path
* returns loaded model object
* model name is the same as what you saved the model file as (see above step)

**"predict" function**

* input args are the loaded model object and a payload
* returns the result of model.predict
* make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)
* from a client, a list or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array
* return the error for debugging

In [14]:
%%writefile modelscript_sklearn.py
import sklearn
from joblib import load
import numpy as np
import os
import pickle
import re


hv = pickle.load(open('./hash_vect.pk', 'rb'))

#Return loaded model
def load_model(modelpath):
    print(modelpath)
#     clf = load(os.path.join(modelpath,'model.joblib'))
    lda = pickle.load(open(os.path.join(modelpath,'lda_model_8.pk'), 'rb'))
    print("loaded")
    return lda

def data_process(data):
    data = [re.sub('[,\\.!?]', '', x) for x in data]
    # Convert the titles to lowercase
    data = [x.lower() for x in data]
    # Remove post with less than 10 words
    data = [x for x in data if len(x.split(' '))>10]
    return hv.transform(data)

# return prediction based on loaded model (from the step above) and an input payload
def predict(model, payload):
    try:
        # locally, payload may come in as a list
        if type(payload)==str:
#             payload = data_process(payload)
#             payload = hash_vectorize(payload)
            out = str(model.transform(data_process([payload]))[0])
        # in remote / container based deployment, payload comes in as a stream of bytes
        else:
#             payload = data_process(paylod.decode())
#             payload = hash_vectorize(payload)
            out = str(model.transform(data_process([payload.decode()]))[0])
    except Exception as e:  
        out = [type(payload),str(e)] #useful for debugging!
    
    return out

Overwriting modelscript_sklearn.py


### Does this work locally? (not "in a container locally", but actually in local)¶


In [15]:
from modelscript_sklearn import *
model = load_model('./LDA/models/')
model

./LDA/models/
loaded


LatentDirichletAllocation(batch_size=16384, doc_topic_prior=None,
                          evaluate_every=5, learning_decay=0.7,
                          learning_method='online', learning_offset=10.0,
                          max_doc_update_iter=100, max_iter=10,
                          mean_change_tol=0.001, n_components=8, n_jobs=None,
                          perp_tol=0.1, random_state=None,
                          topic_word_prior=None, total_samples=1000000.0,
                          verbose=1)

### Load some test data

In [16]:
with open('data/Reddit.txt') as f:
    text = f.readlines()
data = [l.replace('\n','') for l in text]
data[3]

'Just stay in there, youre done for tonight'

#### Test model with string input

In [18]:
# data = data_process(data)
print(data[0])
print(predict(model,data[0]))

Weightlifter promised his wife to win an Olympic gold medal before she died in a car accident
[0.0301322  0.3266709  0.14882721 0.0300641  0.03006834 0.37411071
 0.03008058 0.03004597]


#### Test model with byte input

In [19]:
# data = data_process(data)
print(data[0])
print(predict(model,data[0].encode()))

Weightlifter promised his wife to win an Olympic gold medal before she died in a car accident
[0.0301322  0.3266709  0.14882721 0.0300641  0.03006834 0.37411071
 0.03008058 0.03004597]


#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict¶


In [24]:
!docker container stop $(docker container ls -aq) >/dev/null

### Deploy Locally

Note that to include the serializer vectorizer to the the docker container I had to run this script without `image` parameter and then add `hash_vect.pk` to the `src` folder and run `./src/build-docker.sh 1` to create the `ezsmdeploy-image-1` image. 

In [32]:
ezonsm = ezsmdeploy.Deploy(model = 'LDA/models',
                          script = 'modelscript_sklearn.py',
                          requirements = ['numpy','scikit-learn==0.22.1'],
                          instance_type='local',
                          autoscale = True,
                          image='ezsmdeploy-image-1',
                          wait = True)

[K0:01:48.781782 | compressed model(s)
[K0:01:50.301911 | uploaded model tarball(s) ; check returned modelpath
[K0:01:50.302676 | added requirements file
[K0:01:50.306381 | added source file
[K0:01:50.309202 | added Dockerfile
[K0:01:50.311690 | added model_handler and docker utils
[K0:01:51.583682 | created model(s). Now deploying on local
[32m∙∙∙[0m [KAttaching to tmpd_yhynz0_algo-1-jwllp_1
[36malgo-1-jwllp_1  |[0m Starting the inference server with 2 workers.
[32m∙●∙[0m [K[36malgo-1-jwllp_1  |[0m 2020/10/26 18:58:09 [crit] 10#10: *1 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 172.20.0.1, server: , request: "GET /ping HTTP/1.1", upstream: "http://unix:/tmp/gunicorn.sock:/ping", host: "localhost:8080"
[36malgo-1-jwllp_1  |[0m 172.20.0.1 - - [26/Oct/2020:18:58:09 +0000] "GET /ping HTTP/1.1" 502 182 "-" "-"
[36malgo-1-jwllp_1  |[0m [2020-10-26 18:58:09 +0000] [9] [INFO] Starting gunicorn 20.0.4
[3

### Test containerized version locally


In [36]:
# with str
out = ezonsm.predictor.predict(data[0])
print(out)
# with byte stream
out = ezonsm.predictor.predict(data[0].encode())
print(out)

[36malgo-1-jwllp_1  |[0m received input data
[36malgo-1-jwllp_1  |[0m b'Weightlifter promised his wife to win an Olympic gold medal before she died in a car accident'
b'[0.0301322  0.3266709  0.14882721 0.0300641  0.03006834 0.37411071\n 0.03008058 0.03004597]'[36malgo-1-jwllp_1  |[0m predictions from model
[36malgo-1-jwllp_1  |[0m [0.0301322  0.3266709  0.14882721 0.0300641  0.03006834 0.37411071
[36malgo-1-jwllp_1  |[0m  0.03008058 0.03004597]
[36malgo-1-jwllp_1  |[0m 172.20.0.1 - - [26/Oct/2020:18:59:34 +0000] "POST /invocations HTTP/1.1" 200 90 "-" "-"

[36malgo-1-jwllp_1  |[0m received input data
[36malgo-1-jwllp_1  |[0m b'Weightlifter promised his wife to win an Olympic gold medal before she died in a car accident'
[36malgo-1-jwllp_1  |[0m predictions from model
[36malgo-1-jwllp_1  |[0m [0.0301322  0.3266709  0.14882721 0.0300641  0.03006834 0.37411071
[36malgo-1-jwllp_1  |[0m  0.03008058 0.03004597]
b'[0.0301322  0.3266709  0.14882721 0.0300641  

In [37]:
!docker container stop $(docker container ls -aq) >/dev/null

[36malgo-1-jwllp_1  |[0m [2020-10-26 19:00:39 +0000] [9] [INFO] Handling signal: term
[36mtmpd_yhynz0_algo-1-jwllp_1 exited with code 0
[0mAborting on container exit...


### Deploy as Endpoint

In [40]:
ezonsm = ezsmdeploy.Deploy(model = 'LDA/models',
                          script = 'modelscript_sklearn.py',
                          requirements = ['numpy','scikit-learn==0.22.1'],
                          image='629171485058.dkr.ecr.us-east-1.amazonaws.com/ezsmdeploy-image-1',
                          instance_type='ml.t2.medium')


# ezonsm = ezsmdeploy.Deploy(model = ['model.joblib','model.joblib'], # example of multimodel endpoint. 
#                            script = 'modelscript_sklearn.py',
#                            requirements = ['scikit-learn==0.22.1','numpy',], 
#                            instance_type='ml.t2.medium')

[K0:01:47.140494 | compressed model(s)
[K0:01:52.589216 | uploaded model tarball(s) ; check returned modelpath
[K0:01:52.590009 | added requirements file
[K0:01:52.591749 | added source file
[K0:01:52.593101 | added Dockerfile
[K0:01:52.595079 | added model_handler and docker utils
[K0:01:53.713361 | created model(s). Now deploying on ml.t2.medium
[K0:10:27.697488 | deployed model
[K0:10:27.698154 | estimated cost is $0.07 per hour
[K[32m0:10:27.698257 | Done! ✔[0m 


In [48]:
print(data[0])
out = ezonsm.predictor.predict(data[0])
out


Weightlifter promised his wife to win an Olympic gold medal before she died in a car accident


b'[0.0301322  0.3266709  0.14882721 0.0300641  0.03006834 0.37411071\n 0.03008058 0.03004597]'

In [None]:
!pip install ezsmdeploy[locust]

In [42]:
ezonsm.test(input_data=data[0].encode(),usercount=20,hatchrate=10,timeoutsecs=10)

[K0:00:00.001861 | Starting test with Locust
[K0:00:15.074004 | Done! Please see the src folder for locuststats* files
[K

In [43]:
import pandas as pd

pd.read_csv('src/locuststats_stats.csv')

Unnamed: 0,Type,Name,# requests,# failures,Median response time,Average response time,Min response time,Max response time,Average Content Size,Requests/s,...,75%,80%,90%,95%,98%,99%,99.9%,99.99%,99.999,100%
0,sagemaker,predict,395,0,17,19,12,112,0,36.0,...,20,22,27,35,53,81,110,110,110,110
1,,Aggregated,395,0,17,19,12,112,0,36.0,...,20,22,27,35,53,81,110,110,110,110


In [None]:
ezonsm.predictor.delete_endpoint()