## Voting ensembles in Sklearn

In [3]:
# Voting Ensemble for Classification
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Get Data
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

# Fit
kfold = model_selection.KFold(n_splits=10)
# create the sub models
estimators = []
model1 = LogisticRegression(solver='newton-cg')
estimators.append(('logistic', model1))
model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))
model3 = SVC()
estimators.append(('svm', model3))
# create the ensemble model
ensemble = VotingClassifier(estimators)


results = model_selection.cross_val_score(ensemble, X, Y, cv=kfold)
print(results.mean())

0.7669343814080657


### Save model files

In [8]:
!pip install joblib -q
from joblib import dump

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m


In [11]:
for estimator in ensemble.estimators:
    print(estimator[0])
    dump(estimator[1],estimator[0]+'.joblib')

dump(ensemble,'ensemble.joblib')

logistic
cart
svm


['ensemble.joblib']

In [12]:
!ls *.joblib

cart.joblib  ensemble.joblib  logistic.joblib  model.joblib  svm.joblib


## Step 1 : Write a model transform script

#### Make sure you have a ...

- "load_model" function
    - input args are model path
    - returns loaded model object
    - model name is the same as what you saved the model file as (see above step)
<br><br>
- "predict" function
    - input args are the loaded model object and a payload
    - returns the result of model.predict
    - make sure you format it as a single (or multiple) string return inside a list for real time (for mini batch)
    - from a client, a list  or string or np.array that is sent for prediction is interpreted as bytes. Do what you have to for converting back to list or string or np.array
    - return the error for debugging


In [None]:
%%writefile modelscript_sklearn.py
import sklearn
from joblib import load
import numpy as np
import os

#Return loaded model
def load_model(modelpath):
    print(modelpath)
    
    # Either load individually
    logistic = load(os.path.join(modelpath,'logistic.joblib'))
    cart = load(os.path.join(modelpath,'cart.joblib'))
    svm = load(os.path.join(modelpath,'svm.joblib'))
    
    # Or load the entire ensemble
    ensemble = load(os.path.join(modelpath,'ensemble.joblib'))
    print("loaded")
    return ensemble

# return prediction based on loaded model (from the step above) and an input payload
def predict(model, payload):
    try:
        # locally, payload may come in as an np.ndarray
        if type(payload)==np.ndarray:
            out = [str(model.predict(np.frombuffer(payload).reshape((1,64))))]
        # in remote / container based deployment, payload comes in as a stream of bytes
        else:
            out = [str(model.predict(np.frombuffer(payload[0]['body']).reshape((1,64))))]
    except Exception as e:
        out = [type(payload),str(e)] #useful for debugging!
    
    return out

## Does this work locally? (not "_in a container locally_", but _actually_ in local)

In [None]:
from modelscript_sklearn import *
model = load_model('.')

In [None]:
predict(model,digits.data[-1:])[0]

### ok great! Now let's install ezsmdeploy
In some cases, installs fail due to an existing package installed called greenlet.
This is not a direct dependency of ezsmdeploy but interferes with the installation. 
To fix this, either install in a virtualenv as seen above, or do:
pip install ezsmdeploy[locust] --ignore-installed greenlet

In [None]:
!pip install ezsmdeploy

In [None]:
import ezsmdeploy

#### If you have been running other inference containers in local mode, stop existing containers to avoid conflict

In [None]:
!docker container stop $(docker container ls -aq) >/dev/null

## Deploy locally

In [None]:
ez = ezsmdeploy.Deploy(model = ['model.joblib'], # if you intend to add models later, pass model as list, otherwise str
                  script = 'modelscript_sklearn.py',
                  requirements = ['pyarrow','scikit-learn==0.22.1','numpy','joblib'], #or pass in the path to requirements.txt
                  instance_type = 'local',
                  autoscale = True,
                  #framework = 'sklearn', # not required if you provide requirements
                  wait = True)

## Test containerized version locally

In [None]:
out = ez.predictor.predict(digits.data[-1:].tobytes()).decode()
out

In [None]:
!docker container stop $(docker container ls -aq) >/dev/null

## Deploy on SageMaker

In [None]:
ezonsm = ezsmdeploy.Deploy(model = ['model.joblib','model.joblib'], # example of multimodel endpoint. 
                  script = 'modelscript_sklearn.py',
                  requirements = ['pyarrow','scikit-learn==0.22.1','numpy','joblib'])

In [None]:
#!./src/build-docker.sh test

In [None]:
out = ezonsm.predictor.predict(digits.data[-1:].tobytes(),target_model='model1.tar.gz').decode() 
out

### Install the additional locust testing functionality to enable automated testing

In [None]:
!pip install ezsmdeploy[locust]

In [None]:
ezonsm.test(input_data=digits.data[-1:].tobytes(), target_model='model1.tar.gz',usercount=20,hatchrate=10,timeoutsecs=10)

In [None]:
import pandas as pd

pd.read_csv('src/locuststats_stats.csv')

In [None]:
ezonsm.predictor.delete_endpoint()