## Voting ensembles in Sklearn

In [1]:
!rm modelscript_ensemble_sklearn.py
!rm *.joblib

In [2]:
# Voting Ensemble for Classification
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Get Data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

# Fit
kfold = model_selection.KFold(n_splits=10)
# create the sub models
estimators = []
model1 = LogisticRegression(solver='newton-cg')
estimators.append(('logistic', model1))
model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))
model3 = SVC()
estimators.append(('svm', model3))
# create the ensemble model
ensemble = VotingClassifier(estimators)

ensemble.fit(X,Y)
results = model_selection.cross_val_score(ensemble, X, Y, cv=kfold)
print(results.mean())

0.7617053998632947


### Save model files

In [3]:
!pip install joblib -q
from joblib import dump

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
for estimator in ensemble.estimators:
    print(estimator[0])
    dump(estimator[1],estimator[0]+'.joblib')

dump(ensemble,'ensemble.joblib')

logistic
cart
svm


['ensemble.joblib']

In [5]:
!ls *.joblib

cart.joblib  ensemble.joblib  logistic.joblib  svm.joblib


In [6]:
ensemble.predict(X[0].reshape(1,8))

array([1.])

## Step 1 : Write a model transform script

#### Make sure you have a ...

- "load_model" function
    - input args are model path
    - returns loaded model object
    - model name is the same as what you saved the model file as (see above step)
<br><br>
- "predict" function
    - input args are the loaded model object and a payload
    - returns the result of model.predict
    - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)
    - from a client, a list  or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array
    - return the error for debugging


In [7]:
%%writefile modelscript_ensemble_sklearn.py
import sklearn
from joblib import load
import numpy as np
import os

#Return loaded model
def load_model(modelpath):
    print(modelpath)
    
    # Either load individually
    print("loading individuals")
    logistic = load(os.path.join(modelpath,'logistic.joblib'))
    cart = load(os.path.join(modelpath,'cart.joblib'))
    svm = load(os.path.join(modelpath,'svm.joblib'))
    
    # Or load the entire ensemble
    print("loading ensemble")
    ensemble = load(os.path.join(modelpath,'ensemble.joblib'))
    print("loaded")
    return ensemble

# return prediction based on loaded model (from the step above) and an input payload
def predict(model, payload):
    try:
        # locally, payload may come in as an np.ndarray
        if type(payload)==np.ndarray:
            out = [str(model.predict(payload.reshape((1,8))))]
        # in remote / container based deployment, payload comes in as a stream of bytes
        else:
            out = [str(model.predict(np.frombuffer(payload[0]['body']).reshape((1,8))))]
    except Exception as e:
        out = [type(payload),str(e)] #useful for debugging!
    
    return out

Writing modelscript_ensemble_sklearn.py


## Does this work locally? (not "_in a container locally_", but _actually_ in local)

In [8]:
from modelscript_ensemble_sklearn import *
model = load_model('.')

.
loading individuals
loading ensemble
loaded


In [9]:
predict(model,X[0])

['[1.]']

### ok great! Now let's install ezsmdeploy
In some cases, installs fail due to an existing package installed called greenlet.
This is not a direct dependency of ezsmdeploy but interferes with the installation. 
To fix this, either install in a virtualenv as seen above, or do:
pip install ezsmdeploy[locust] --ignore-installed greenlet

In [10]:
!pip install ezsmdeploy

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m


In [11]:
import ezsmdeploy

#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict

In [12]:
!docker container stop $(docker container ls -aq) >/dev/null

"docker container stop" requires at least 1 argument.
See 'docker container stop --help'.

Usage:  docker container stop [OPTIONS] CONTAINER [CONTAINER...]

Stop one or more running containers


## Deploy locally

#### tar all models

In [14]:
!rm model.tar.gz
!find ./ -name "*.joblib" | tar -czf model.tar.gz -T -

In [16]:
ez = ezsmdeploy.Deploy(model = 'model.tar.gz', # if you intend to add models later, pass model as list, otherwise str
                  script = 'modelscript_ensemble_sklearn.py',
                  requirements = ['scikit-learn=='+sklearn.__version__,'numpy','joblib'], #or pass in the path to requirements.txt
                  instance_type = 'local',
                  wait = True)

[K0:00:00.003312 | compressed model(s)
[K0:00:00.112983 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.113810 | added requirements file
[K0:00:00.115425 | added source file
[K0:00:00.116838 | added Dockerfile
[K0:00:00.118620 | added model_handler and docker utils
[K0:00:00.118698 | building docker container
[K0:00:50.047039 | built docker container
[K0:00:50.181797 | created model(s). Now deploying on local
[32m∙●∙[0m [K

Exception in thread Thread-5:
Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/local/image.py", line 618, in run
    _stream_output(self.process)
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/local/image.py", line 677, in _stream_output
    raise RuntimeError("Process exited with code: %s" % exit_code)
RuntimeError: Process exited with code: 1

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/sagemaker/local/image.py", line 623, in run
    raise RuntimeError(msg)
RuntimeError: Failed to run: ['docker-compose', '-f', '/tmp/tmptf3652jw/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1



[K2m●∙∙[0m [K

RuntimeError: Giving up, endpoint didn't launch correctly

## Test containerized version locally

In [None]:
out = ez.predictor.predict(digits.data[-1:].tobytes()).decode()
out

In [None]:
!docker container stop $(docker container ls -aq) >/dev/null

## Deploy on SageMaker

In [17]:
ezonsm = ezsmdeploy.Deploy(model = 'model.tar.gz', 
                  script = 'modelscript_ensemble_sklearn.py',
                  requirements = ['scikit-learn=='+sklearn.__version__,'numpy','joblib'], #or pass in the path to requirements.txt
                  instance_type = 'ml.m4.xlarge',
                  wait = True)

[K0:00:00.003374 | compressed model(s)
[K0:00:00.099858 | uploaded model tarball(s) ; check returned modelpath
[K0:00:00.100601 | added requirements file
[K0:00:00.102223 | added source file
[K0:00:00.103464 | added Dockerfile
[K0:00:00.105355 | added model_handler and docker utils
[K0:00:00.105437 | building docker container
[K0:00:53.762390 | built docker container
[K0:00:53.882429 | created model(s). Now deploying on ml.m4.xlarge
[K2m∙∙∙[0m [K-*

UnexpectedStatusException: Error hosting endpoint ezsmdeploy-endpoint-u6xhkurqpstyh4w3qdvgmr: Failed. Reason:  The image '497456752804.dkr.ecr.us-east-1.amazonaws.com/ezsmdeploy-image-u6xhkurqpstyh4w3qdvgmr' does not exist..

In [22]:
!./src/build-docker.sh test

Building container ezsmdeploy-image-test
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
sha256:2c92f87b3c2261c33d5d611cc244b3114e71fbb054c311290376fc711bf78dbc
The push refers to repository [497456752804.dkr.ecr.us-east-1.amazonaws.com/ezsmdeploy-image-test]

[1B99479e1c: Preparing 
[1B4529524e: Preparing 
[1Bf6d11659: Preparing 
[1B283711ad: Preparing 
[1Be385d4fd: Preparing 
[1Bf8484ab4: Preparing 
[1B52bb8197: Preparing 
[1B818f1f96: Preparing 
[1B2392e386: Preparing 
[4B52bb8197: Pushed   578.7MB/570.1MB[8A[2K[7A[2K[7A[2K[7A[2K[10A[2K[7A[2K[5A[2K[9A[2K[3A[2K[5A[2K[2A[2K[5A[2K[4A[2K[5A[2K[7A[2K[7A[2K[1A[2K[2A[2K[7A[2K[4A[2K[7A[2K[5A[2K[7A[2K[1A[2K[4A[2K[1A[2K[4A[2K[1A[2K[7A[2K[1A[2K[5A[2K[1A[2K[5A[2K[1A[2K[5A[2K[1A[2K[4A[2K[4A[2K[5A[2K[4A[2K[5A[2K[4A[2K[1A[2K[4A[2K[5A[2K[7A[2K[4A[2K[1A[2K[4A[2K[5A[2K[4A[2K[5A[2K[1A[2K[4A

In [None]:
out = ezonsm.predictor.predict(digits.data[-1:].tobytes(),target_model='model1.tar.gz').decode() 
out

### Install the additional locust testing functionality to enable automated testing

In [None]:
!pip install ezsmdeploy[locust]

In [None]:
ezonsm.test(input_data=digits.data[-1:].tobytes(), target_model='model1.tar.gz',usercount=20,hatchrate=10,timeoutsecs=10)

In [None]:
import pandas as pd

pd.read_csv('src/locuststats_stats.csv')

In [None]:
ezonsm.predictor.delete_endpoint()