# sklearn IRIS

In [3]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# add venv PATH to shell command PATH
import sys, os
if sys.base_prefix not in os.environ['PATH']:
    os.environ['PATH'] = f"{sys.base_prefix}/bin:{os.environ['PATH']}"

In [4]:
import bentoml
NAME = 'sklearn_iris_clipper'

# load data & train model

In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

    
# add parameters for tuning
num_estimators = 100

# train the model
model = RandomForestRegressor(n_estimators=num_estimators)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print('predictions: ', predictions)

# log model performance 
mse = mean_squared_error(y_test, predictions)
print("  mse: %f" % mse)

predictions:  [2.   1.   0.   1.   1.68 0.   1.19 1.   0.   1.   2.   1.   0.   2.
 0.   1.94 2.   2.   0.   0.   1.   2.   1.   1.32 1.56 1.94 1.   1.
 2.   2.  ]
  mse: 0.089390


In [6]:
%%writefile {NAME}.py

import bentoml
from bentoml.artifact import SklearnModelArtifact
from bentoml.handlers import ClipperFloatsHandler


@bentoml.env()
@bentoml.artifacts([SklearnModelArtifact('model')])
class BentoSvc(bentoml.BentoService):

    @bentoml.api(ClipperFloatsHandler)
    def predict_clipper(self, inputs):
        outputs = self.artifacts.model.predict(inputs)
        return outputs

Writing sklearn_iris_clipper.py


In [7]:
from sklearn_iris_clipper import BentoSvc

bento_svc = BentoSvc()
bento_svc.pack("model", model)
saved_path = bento_svc.save()

running sdist
running egg_info
writing BentoML.egg-info/PKG-INFO
writing dependency_links to BentoML.egg-info/dependency_links.txt
writing entry points to BentoML.egg-info/entry_points.txt
writing requirements to BentoML.egg-info/requires.txt
writing top-level names to BentoML.egg-info/top_level.txt
reading manifest file 'BentoML.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'


no previously-included directories found matching 'e2e_tests'
no previously-included directories found matching 'tests'


writing manifest file 'BentoML.egg-info/SOURCES.txt'
running check
creating BentoML-0.5.2+137.gcbdaab7.dirty
creating BentoML-0.5.2+137.gcbdaab7.dirty/BentoML.egg-info
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/artifact
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/bundler
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/cli
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/clipper
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/configuration
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/configuration/__pycache__
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/deployment
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/deployment/aws_lambda
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/deployment/sagemaker
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/handlers
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/marshal
creating BentoML-0.5.2+137.gcbdaab7.dirty/bentoml/migrations
creating BentoML-0.5.2+137.gcbd

In [8]:
from bentoml.utils import detect_free_port
PORT = detect_free_port()
print(PORT)

57003


# Build & Run Bento Service in Docker

In [9]:
from clipper_admin import ClipperConnection, DockerContainerManager
from bentoml.clipper import deploy_bentoml
cl = ClipperConnection(DockerContainerManager())
try:
    cl.start_clipper()
except:
    cl.connect()

APP_NAME = saved_path.split('/')[-1].lower()
cl.register_application(APP_NAME, 'floats', 'default_pred', 300000)

20-03-19:17:38:27 INFO     [clipper_admin.py:172] [default-cluster] Successfully connected to Clipper cluster at localhost:1337
20-03-19:17:38:27 INFO     [clipper_admin.py:236] [default-cluster] Application 20200319171544_8c9b8f was successfully registered


In [10]:
clipper_model_name, clipper_model_version = deploy_bentoml(
    cl, saved_path, 'predict_clipper',
    build_envs=dict(
        PIP_INDEX_URL="http://192.168.138.2/simple",
        PIP_TRUSTED_HOST="192.168.138.2",
    )
)


[2020-03-19 17:38:37,677] INFO - Step 1/12 : FROM clipper/python36-closure-container:0.4.1
[2020-03-19 17:38:37,679] INFO - 

[2020-03-19 17:38:37,690] INFO -  ---> e5b9dc250c05

[2020-03-19 17:38:37,691] INFO - Step 2/12 : COPY . /container
[2020-03-19 17:38:37,693] INFO - 

[2020-03-19 17:38:37,943] INFO -  ---> 747f7c52629b

[2020-03-19 17:38:37,959] INFO - Step 3/12 : WORKDIR /container
[2020-03-19 17:38:37,960] INFO - 

[2020-03-19 17:38:38,033] INFO -  ---> Running in b627d9b3af6d

[2020-03-19 17:38:38,148] INFO -  ---> f1783e8d17c1

[2020-03-19 17:38:38,150] INFO - Step 4/12 : ENV PIP_INDEX_URL=http://192.168.138.2/simple
[2020-03-19 17:38:38,151] INFO - 

[2020-03-19 17:38:38,198] INFO -  ---> Running in e25bca987c2e

[2020-03-19 17:38:38,300] INFO -  ---> 2e731c48546b

[2020-03-19 17:38:38,301] INFO - Step 5/12 : ENV PIP_TRUSTED_HOST=192.168.138.2
[2020-03-19 17:38:38,301] INFO - 

[2020-03-19 17:38:38,354] INFO -  ---> Running in fb94ef081553

[2020-03-19 17:38:38,464] INFO -

20-03-19:17:39:41 INFO     [docker_container_manager.py:409] [default-cluster] Found 0 replicas for bentosvc-predict-clipper:20200319171544-8c9b8f. Adding 1
20-03-19:17:39:42 INFO     [clipper_admin.py:724] [default-cluster] Successfully registered model bentosvc-predict-clipper:20200319171544-8c9b8f
20-03-19:17:39:42 INFO     [clipper_admin.py:642] [default-cluster] Done deploying model bentosvc-predict-clipper:20200319171544-8c9b8f.




In [11]:
cl.link_model_to_app(APP_NAME, clipper_model_name)
addr = cl.get_query_addr()
clipper_url = f"http://{addr}/{APP_NAME}/predict"
clipper_url

20-03-19:17:39:57 INFO     [clipper_admin.py:303] [default-cluster] Model bentosvc-predict-clipper is now linked to application 20200319171544_8c9b8f


In [19]:
clipper_url

'http://localhost:1337/20200319171544_8c9b8f/predict'

# Test with requests

In [18]:
import json
import requests

from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

headers = {"content-type": "application/json"}
data = json.dumps(
       {"input": X_test[0].tolist()}
)

json_response = requests.post(clipper_url, data=data, headers=headers)
print(json_response)
print(json_response.text)

<Response [200]>
{"query_id":3,"output":2.0,"default":false}


# Benchmark with locust

In [20]:
%%writefile benchmark_{NAME}.py
from locust import HttpLocust, TaskSet, task, constant
from functools import lru_cache

import pandas as pd
import json


@lru_cache(maxsize=1)
def data_producer():

    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    iris = datasets.load_iris()
    x = iris.data[:, 2:]
    y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

    headers = {"content-type": "application/json"}

    def _gen_data():
        data = json.dumps(
               {"input": X_test[0].tolist()}
        )
        return headers, data

    return _gen_data


class WebsiteTasks(TaskSet):

    @task
    def index(self):
        headers, data = data_producer()()
        self.client.post("", data, headers=headers)

class WebsiteUser(HttpLocust):
    task_set = WebsiteTasks
    wait_time = constant(1)

Writing benchmark_sklearn_iris_clipper.py


In [None]:
!locust -f benchmark_{NAME}.py -H {clipper_url}

[2020-03-19 17:44:40,310] beta/INFO/locust.main: Starting web monitor at http://*:8089
[2020-03-19 17:44:40,310] beta/INFO/locust.main: Starting Locust 0.14.4
[2020-03-19 17:44:51,641] beta/INFO/locust.runners: Hatching and swarming 1000 users at the rate 100 users/s (0 users already running)...
[2020-03-19 17:45:05,107] beta/INFO/locust.runners: All locusts hatched: WebsiteUser: 1000 (0 already running)
