In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# add venv PATH to shell command PATH
import sys, os
if sys.base_prefix not in os.environ['PATH']:
    os.environ['PATH'] = f"{sys.base_prefix}/bin:{os.environ['PATH']}"

In [2]:
import bentoml
NAME = 'linear_sleep'

# load data & train model(not used, just placeholder)

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

    
# add parameters for tuning
num_estimators = 100

# train the model
model = RandomForestRegressor(n_estimators=num_estimators)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print('predictions: ', predictions)

# log model performance 
mse = mean_squared_error(y_test, predictions)
print("  mse: %f" % mse)

predictions:  [2.   1.   0.   1.   1.59 0.   1.23 1.   0.   1.   2.   1.   0.   2.
 0.   1.88 2.   2.   0.   0.   1.   2.   1.   1.38 1.52 1.88 1.   1.
 2.   2.  ]
  mse: 0.087487


In [4]:
%%writefile {NAME}.py

import bentoml
import time
from bentoml.artifact import SklearnModelArtifact
from bentoml.handlers import DataframeHandler


@bentoml.env()
@bentoml.artifacts([SklearnModelArtifact('model')])
class BentoSvc(bentoml.BentoService):
    @bentoml.api(DataframeHandler)
    def predict(self, inputs):
        a, b = inputs.to_numpy()[0]
        x = inputs.shape[0]
        time.sleep(a * x + b)
        return inputs

Overwriting linear_sleep.py


In [5]:
from linear_sleep import BentoSvc

bento_svc = BentoSvc()
bento_svc.pack("model", model)
saved_path = bento_svc.save()

running sdist
running egg_info
writing BentoML.egg-info/PKG-INFO
writing dependency_links to BentoML.egg-info/dependency_links.txt
writing entry points to BentoML.egg-info/entry_points.txt
writing requirements to BentoML.egg-info/requires.txt
writing top-level names to BentoML.egg-info/top_level.txt
reading manifest file 'BentoML.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'


no previously-included directories found matching 'e2e_tests'
no previously-included directories found matching 'tests'


writing manifest file 'BentoML.egg-info/SOURCES.txt'
running check
creating BentoML-0.5.2+144.g2865d83
creating BentoML-0.5.2+144.g2865d83/BentoML.egg-info
creating BentoML-0.5.2+144.g2865d83/bentoml
creating BentoML-0.5.2+144.g2865d83/bentoml/artifact
creating BentoML-0.5.2+144.g2865d83/bentoml/bundler
creating BentoML-0.5.2+144.g2865d83/bentoml/cli
creating BentoML-0.5.2+144.g2865d83/bentoml/clipper
creating BentoML-0.5.2+144.g2865d83/bentoml/configuration
creating BentoML-0.5.2+144.g2865d83/bentoml/configuration/__pycache__
creating BentoML-0.5.2+144.g2865d83/bentoml/deployment
creating BentoML-0.5.2+144.g2865d83/bentoml/deployment/aws_lambda
creating BentoML-0.5.2+144.g2865d83/bentoml/deployment/sagemaker
creating BentoML-0.5.2+144.g2865d83/bentoml/handlers
creating BentoML-0.5.2+144.g2865d83/bentoml/marshal
creating BentoML-0.5.2+144.g2865d83/bentoml/migrations
creating BentoML-0.5.2+144.g2865d83/bentoml/migrations/__pycache__
creating BentoML-0.5.2+144.g2865d83/bentoml/migrations

In [6]:
from bentoml.utils import detect_free_port
PORT = detect_free_port()
print(PORT)

52565


In [7]:
print(f"bentoml serve-gunicorn {saved_path} --port {PORT} --workers 1 --enable-microbatch")
!bentoml serve-gunicorn {saved_path} --port {PORT} --workers 1 --enable-microbatch

bentoml serve-gunicorn /home/bentoml/bentoml/repository/BentoSvc/20200324222939_C004B4 --port 52565 --workers 1 --enable-microbatch
[2020-03-24 22:29:52,516] INFO - Running micro batch service on :52565
[2020-03-24 22:29:53 +0800] [1965917] [INFO] Starting gunicorn 20.0.4
[2020-03-24 22:29:53 +0800] [1965917] [INFO] Listening at: http://0.0.0.0:59881 (1965917)
[2020-03-24 22:29:53 +0800] [1965917] [INFO] Using worker: sync
[2020-03-24 22:29:53 +0800] [1966251] [INFO] Booting worker with pid: 1966251
[2020-03-24 22:29:53 +0800] [1966051] [INFO] Starting gunicorn 20.0.4
[2020-03-24 22:29:53 +0800] [1966051] [INFO] Listening at: http://0.0.0.0:52565 (1966051)
[2020-03-24 22:29:53 +0800] [1966051] [INFO] Using worker: aiohttp.worker.GunicornWebWorker
[2020-03-24 22:29:53 +0800] [1966252] [INFO] Booting worker with pid: 1966252
[2020-03-24 22:29:53,913] INFO - Micro batch enabled for API `predict`
[2020-03-24 22:29:53,914] INFO - Your system nofile limit is 1024, which means each instance o

# Build & Run Bento Service in Docker

In [None]:
!cd {saved_path}
IMG_NAME = saved_path.split('/')[-1].lower()
!docker build -t {IMG_NAME} \
    --build-arg PIP_TRUSTED_HOST=192.168.138.2 \  # set your prefer PYPI mirror
    --build-arg PIP_INDEX_URL=http://192.168.138.2/simple \
    {saved_path}
!docker run -itd -p {PORT}:5000 --cpus 1 -e FLAGS="--workers 1 --enable-microbatch" {IMG_NAME}:latest

# Test with requests

In [None]:
import json
import requests
import pandas as pd


headers = {"content-type": "application/json"}
data = pd.DataFrame([[0.0, 2.0]]).to_json()
print(data)

json_response = requests.post(f'http://127.0.0.1:{PORT}/predict',
                              data=data, headers=headers)
print(json_response)
print(json_response.json())

# Benchmark with locust

In [None]:
%%writefile benchmark_{NAME}.py
from locust import HttpLocust, TaskSet, task, constant
from functools import lru_cache

import pandas as pd
import json
import random
import os

A = float(os.environ.get('A', 0))
B = float(os.environ.get('B', 0))
WAIT = float(os.environ.get('WAIT', 1))


@lru_cache(maxsize=1)
def data_producer():

    headers = {"content-type": "application/json"}

    def _gen_data():
        _A = A + random.random() / 100000.0
        _B = B + random.random() / 10000.0
        data = pd.DataFrame([[_A, _B]]).to_json()
        return headers, data

    return _gen_data


class WebsiteTasks(TaskSet):

    @task
    def index(self):
        headers, data = data_producer()()
        self.client.post("/predict", data, headers=headers)

class WebsiteUser(HttpLocust):
    task_set = WebsiteTasks
    wait_time = constant(WAIT)

In [None]:
print(f"locust -f benchmark_{NAME}.py -H http://127.0.0.1:{PORT} --port 8090")
!locust -f benchmark_{NAME}.py -H http://127.0.0.1:{PORT} --port 8090