In [7]:
bentoml_bundle_path = '/home/bentoml/bentoml/repository/Service/20200424104819_1D90A9'

In [8]:
from bentoml.utils import detect_free_port
PORT = detect_free_port()
print(PORT)
server_url = f"http://127.0.0.1:{PORT}/predict"
print(server_url)

43667
http://127.0.0.1:43667/predict


# Serve BentoML Server directly

In [9]:
print(f"!bentoml serve-gunicorn {bentoml_bundle_path} --port {PORT} --enable-microbatch --workers 1")
#!bentoml serve-gunicorn {bentoml_bundle_path} --port {PORT} --enable-microbatch --workers 1

!bentoml serve-gunicorn /home/bentoml/bentoml/repository/Service/20200424104819_1D90A9 --port 43667 --enable-microbatch --workers 1


# Serve with docker

In [14]:
!cd {bentoml_bundle_path}
IMG_NAME = bentoml_bundle_path.split('/')[-1].lower()
!docker build -t {IMG_NAME} {bentoml_bundle_path}

Sending build context to Docker daemon  457.3MB
Step 1/15 : FROM continuumio/miniconda3:4.7.12
 ---> 406f2b43ea59
Step 2/15 : ENTRYPOINT [ "/bin/bash", "-c" ]
 ---> Using cache
 ---> 72ac38cf396d
Step 3/15 : EXPOSE 5000
 ---> Using cache
 ---> 8475dc08cadd
Step 4/15 : RUN set -x      && apt-get update      && apt-get install --no-install-recommends --no-install-suggests -y libpq-dev build-essential      && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 0fc16fc9a6ea
Step 5/15 : RUN conda install pip numpy scipy       && pip install gunicorn
 ---> Using cache
 ---> b2dc0adaeadb
Step 6/15 : COPY . /bento
 ---> 6e20f1185529
Step 7/15 : WORKDIR /bento
 ---> Running in 3ce780a7bebb
Removing intermediate container 3ce780a7bebb
 ---> d862f36ec084
Step 8/15 : RUN if [ -f /bento/setup.sh ]; then /bin/bash -c /bento/setup.sh; fi
 ---> Running in acddeeb555df
Removing intermediate container acddeeb555df
 ---> f62df9b20005
Step 9/15 : RUN conda env update -n base -f /bento/environment.yml
 ---

In [15]:
!docker run -itd -p {PORT}:5000 -e FLAGS="--workers 1 --enable-microbatch" {IMG_NAME}:latest

be8ac85715ec631e0d275b247e656a2ba820fba4044fb55e0a019b19a27bc0c9


# Test Predict API

## prepare request

In [10]:
import random
import json
import pandas as pd
def get_request_producer():
    url = server_url
    method = "POST"
    headers = {"content-type": "application/json"}
    pred_sentence =  "The acting was a bit lacking."

    def _gen_data(size=1):
        raw = pred_sentence
        noise = f" By User:{random.random()} fine"
        data = pd.DataFrame([raw + noise] * size).to_json()
        return url, method, headers, data

    return _gen_data

# test
get_request_producer()(2)

('http://127.0.0.1:43667/predict',
 'POST',
 {'content-type': 'application/json'},
 '{"0":{"0":"The acting was a bit lacking. By User:0.47683408042242015 fine","1":"The acting was a bit lacking. By User:0.47683408042242015 fine"}}')

## Request with Python requests

In [16]:
%%time
import requests
gen = get_request_producer()
url, me, h, d = gen(5)
r = requests.request(me, url, headers=h, data=d)
print(r)
print(r.content)

<Response [200]>
b'["negative", "negative", "negative", "negative", "negative"]'
CPU times: user 9.54 ms, sys: 0 ns, total: 9.54 ms
Wall time: 2.1 s


from bentoml.utils.benchmark import BenchmarkClient
b = BenchmarkClient(get_request_producer(), lambda: 1, timeout=10)
b.start_session(120, 10, 900)

In [13]:
from bentoml.utils.benchmark import BenchmarkClient
b = BenchmarkClient(get_request_producer(), lambda: 1, timeout=20)
b.start_session(120, total_user=50, spawn_speed=2)


╒═════════════╤═════════════╤══════════╤═════════════════╤═══════════════════╕
│ Reqs/Fail   │   Failure % │   Reqs/s │ Avg Resp Time   │   Client Health % │
╞═════════════╪═════════════╪══════════╪═════════════════╪═══════════════════╡
│ 0/0         │           0 │        0 │ None            │               100 │
╘═════════════╧═════════════╧══════════╧═════════════════╧═══════════════════╛

╒═════════════╤═════════════╤══════════╤═════════════════╤═══════════════════╕
│ Reqs/Fail   │   Failure % │   Reqs/s │   Avg Resp Time │   Client Health % │
╞═════════════╪═════════════╪══════════╪═════════════════╪═══════════════════╡
│ 1/0         │           0 │      0.5 │         1.44865 │               100 │
╘═════════════╧═════════════╧══════════╧═════════════════╧═══════════════════╛

╒═════════════╤═════════════╤══════════╤═════════════════╤═══════════════════╕
│ Reqs/Fail   │   Failure % │   Reqs/s │   Avg Resp Time │   Client Health % │
╞═════════════╪═════════════╪══════════╪═════════