# sklearn IRIS
![Impression](https://www.google-analytics.com/collect?v=1&tid=UA-112879361-3&cid=555&t=event&ec=benchmark&ea=bentoml-benchmark&dt=sklearn-iris-clipper)

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [1]:
import bentoml
NAME = 'sklearn_iris_clipper'

# load data & train model

In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

    
# add parameters for tuning
num_estimators = 100

# train the model
model = RandomForestRegressor(n_estimators=num_estimators)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print('predictions: ', predictions)

# log model performance 
mse = mean_squared_error(y_test, predictions)
print("  mse: %f" % mse)

predictions:  [2.   1.   0.   1.   1.66 0.   1.13 1.   0.   1.   2.   1.   0.   2.
 0.   1.87 2.   2.   0.   0.   1.   2.   1.   1.26 1.58 1.87 1.   1.
 2.   2.  ]
  mse: 0.087677


In [3]:
%%writefile {NAME}.py

import bentoml
from bentoml.artifact import SklearnModelArtifact
from bentoml.handlers import ClipperFloatsHandler


@bentoml.env()
@bentoml.artifacts([SklearnModelArtifact('model')])
class BentoSvc(bentoml.BentoService):

    @bentoml.api(ClipperFloatsHandler)
    def predict_clipper(self, inputs):
        outputs = self.artifacts.model.predict(inputs)
        return outputs

Overwriting sklearn_iris_clipper.py


In [4]:
from sklearn_iris_clipper import BentoSvc

bento_svc = BentoSvc()
bento_svc.pack("model", model)
saved_path = bento_svc.save()

running sdist
running egg_info
writing BentoML.egg-info/PKG-INFO
writing dependency_links to BentoML.egg-info/dependency_links.txt
writing entry points to BentoML.egg-info/entry_points.txt
writing requirements to BentoML.egg-info/requires.txt
writing top-level names to BentoML.egg-info/top_level.txt
reading manifest file 'BentoML.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'


no previously-included directories found matching 'e2e_tests'
no previously-included directories found matching 'tests'
no previously-included directories found matching 'benchmark'


writing manifest file 'BentoML.egg-info/SOURCES.txt'
running check
creating BentoML-0.4.9+228.g3fbe5b9.dirty
creating BentoML-0.4.9+228.g3fbe5b9.dirty/BentoML.egg-info
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/artifact
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/bundler
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/cli
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/clipper
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/configuration
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/configuration/__pycache__
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/deployment
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/deployment/aws_lambda
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/deployment/sagemaker
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/handlers
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/marshal
creating BentoML-0.4.9+228.g3fbe5b9.dirty/bentoml/migrations
creating BentoML-0.4.9+228.g3fb

# Build & Run Bento Service in Docker

In [5]:
from clipper_admin import ClipperConnection, DockerContainerManager
from bentoml.clipper import deploy_bentoml
cl = ClipperConnection(DockerContainerManager())
try:
    cl.start_clipper(cache_size=1)  # limit cache size for comparison
except:
    cl.connect()

20-05-14:09:12:30 INFO     [docker_container_manager.py:184] [default-cluster] Starting managed Redis instance in Docker
20-05-14:09:12:33 INFO     [docker_container_manager.py:276] [default-cluster] Metric Configuration Saved at /tmp/tmp1s2ntzlo.yml
20-05-14:09:12:34 INFO     [clipper_admin.py:162] [default-cluster] Clipper is running


In [6]:
APP_NAME = saved_path.split('/')[-1].lower()
cl.register_application(APP_NAME, 'floats', 'default_pred', 300000)

20-05-14:09:12:34 INFO     [clipper_admin.py:236] [default-cluster] Application 20200514091210_5a9da3 was successfully registered


In [7]:
clipper_model_name, clipper_model_version = deploy_bentoml(
    cl, saved_path, 'predict_clipper',
    build_envs=dict()
)

[2020-05-14 09:12:34,994] INFO - Step 1/12 : FROM clipper/python36-closure-container:0.4.1
[2020-05-14 09:12:34,995] INFO - 

[2020-05-14 09:12:34,996] INFO -  ---> e5b9dc250c05

[2020-05-14 09:12:34,998] INFO - Step 2/12 : COPY . /container
[2020-05-14 09:12:35,000] INFO - 

[2020-05-14 09:12:35,225] INFO -  ---> 9a0bdfc1988a

[2020-05-14 09:12:35,227] INFO - Step 3/12 : WORKDIR /container
[2020-05-14 09:12:35,228] INFO - 

[2020-05-14 09:12:35,283] INFO -  ---> Running in c110e0166657

[2020-05-14 09:12:35,411] INFO -  ---> 2f8e1d28eaa3

[2020-05-14 09:12:35,414] INFO - Step 4/12 : ENV PIP_INDEX_URL=
[2020-05-14 09:12:35,415] INFO - 

[2020-05-14 09:12:35,471] INFO -  ---> Running in 1e2937cae546

[2020-05-14 09:12:35,583] INFO -  ---> d6f4b3e73813

[2020-05-14 09:12:35,585] INFO - Step 5/12 : ENV PIP_TRUSTED_HOST=
[2020-05-14 09:12:35,586] INFO - 

[2020-05-14 09:12:35,634] INFO -  ---> Running in 7ffe390af7c4

[2020-05-14 09:12:35,748] INFO -  ---> 9c4a587d5b61

[2020-05-14 09:12:3

20-05-14:09:13:27 INFO     [docker_container_manager.py:409] [default-cluster] Found 0 replicas for bentosvc-predict-clipper:20200514091210-5a9da3. Adding 1
20-05-14:09:13:28 INFO     [clipper_admin.py:724] [default-cluster] Successfully registered model bentosvc-predict-clipper:20200514091210-5a9da3
20-05-14:09:13:28 INFO     [clipper_admin.py:642] [default-cluster] Done deploying model bentosvc-predict-clipper:20200514091210-5a9da3.




In [8]:
cl.set_num_replicas(clipper_model_name, 1)  # limit workers for comparison
cl.link_model_to_app(APP_NAME, clipper_model_name)
addr = cl.get_query_addr()
server_url = f"http://{addr}/{APP_NAME}/predict"

20-05-14:09:13:29 INFO     [clipper_admin.py:303] [default-cluster] Model bentosvc-predict-clipper is now linked to application 20200514091210_5a9da3


In [9]:
server_url

'http://localhost:1337/20200514091210_5a9da3/predict'

# Test with requests

In [14]:
import json
import requests

from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

headers = {"content-type": "application/json"}
data = json.dumps(
       {"input": X_test[0].tolist()}
)

json_response = requests.post(server_url, data=data, headers=headers)
print(json_response)
print(json_response.text)

<Response [200]>
{"query_id":1,"output":2.0,"default":false}


# Benchmark

In [15]:
import pandas as pd
import json
import copy
import random


def get_request_producer():

    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    iris = datasets.load_iris()
    x = iris.data[:, 2:]
    y = iris.target

    url = server_url
    method = "POST"
    headers = {"content-type": "application/json"}
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=7)
    base_data = X_test[0].tolist()

    def _gen_data():
        raw = copy.deepcopy(base_data)
        raw[0] += random.random() / 10000
        raw[1] += random.random() / 10000
        data = json.dumps({"input": raw})
        return url, method, headers, data

    return _gen_data

get_request_producer()()

('http://localhost:1337/20200514091210_5a9da3/predict',
 'POST',
 {'content-type': 'application/json'},
 '{"input": [5.100004337474625, 1.80001675040652]}')

In [16]:
# When it reached the limit of clipper, clipper will response with the default value
def verify_clipper_response(status, msg):
    if status // 100 == 2 and "default_pred" not in msg:
        return True
    return False

In [17]:
import os


def init_file(file_name):
    if os.path.exists(LOG_FILE):
        return
    with open(file_name, "a") as lf:
        lf.write('"model name","test_users","total succ","succ/sec","avg resp time",'
                 '"total fail","fail/sec","avg fail resp time"')
        lf.write('\n')


def log_result(b, name, file_name, test_user):
    init_file(file_name)
    self = b.stat

    result = []
    result.append(name)
    result.append(test_user)
    result.append(self.success)
    result.append(self.success / max(self.sess_time, 1))
    result.append(sum(self.succ_times) / max(self.success, 1))

    result.append(self.fail)
    result.append(self.fail / max(self.sess_time, 1))
    result.append(sum(self.exec_times) / max(self.fail, 1))
    
    result.append((1 - self.client_busy / max(self.req_total, 1)))

    log_str = ','.join(str(r) for r in result)
    with open(file_name, "a") as lf:
        lf.write(log_str)
        lf.write('\n')

        
LOG_FILE = 'benchmark_result.csv'

In [18]:
import asyncio
from bentoml.utils.benchmark import BenchmarkClient

for TEST_USER in range(100, 1600, 100):
    print(TEST_USER)
    b = BenchmarkClient(get_request_producer(), lambda: 1, timeout=0.32,
                        verify_response=verify_clipper_response)
    await b._start_session(60, TEST_USER, TEST_USER)
    log_result(b, f'{NAME}', LOG_FILE, TEST_USER)
    await asyncio.sleep(15)

100

╒══════════╤═════════╤══════════╤═════════════════╤═══════════════════╕
│ Result   │   Total │   Reqs/s │   Resp Time Avg │ Client Health %   │
╞══════════╪═════════╪══════════╪═════════════════╪═══════════════════╡
│ succ     │       0 │        0 │             nan │ 100.0             │
├──────────┼─────────┼──────────┼─────────────────┼───────────────────┤
│ fail     │       0 │        0 │             nan │                   │
╘══════════╧═════════╧══════════╧═════════════════╧═══════════════════╛
------ 100 users spawned ------

╒══════════╤═════════╤══════════╤═════════════════╤═══════════════════╕
│ Result   │   Total │   Reqs/s │   Resp Time Avg │ Client Health %   │
╞══════════╪═════════╪══════════╪═════════════════╪═══════════════════╡
│ succ     │     171 │       83 │       0.0607761 │ 100.0             │
├──────────┼─────────┼──────────┼─────────────────┼───────────────────┤
│ fail     │       0 │        0 │     nan         │                   │
╘══════════╧═════════╧════