# BentoML Example: Tensorflow 2.0 example (Echo model)

[BentoML](http://bentoml.ai) is an open source platform for machine learning model serving and deployment. 

This notebook demonstrates how to use BentoML to turn a Tensorflow model into a docker image containing a REST API server serving this model, how to use your ML service built with BentoML as a CLI tool, and how to distribute it a pypi package.


In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import numpy as np
print(tf.__version__)

import os
import time
import requests
import json

2.0.0


In [4]:
class EchoModel(tf.keras.Model):
    def call(self, x):
        return tf.multiply(x, 1)

custom_model = EchoModel()
custom_model.compile(optimizer='sgd',
              loss="mean_squared_error",
              metrics=['accuracy'])

test_input =  tf.constant(np.zeros([1, 2, 2]))
test_output = tf.constant(np.zeros([1, 2, 2]))

custom_model.fit(test_input, test_output, epochs=1)  # required. it will generate the signature automaticlly

# test
custom_model(tf.constant(np.ones([4, 2, 3]), dtype=tf.float32))

Train on 1 samples


<tf.Tensor: id=192, shape=(4, 2, 3), dtype=float32, numpy=
array([[[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]]], dtype=float32)>

In [6]:
test_tensor = tf.constant(np.zeros([2,4,1]), dtype=tf.float32)
custom_model(test_tensor)

# Serve with bentoml
----

In [11]:
%%writefile tensorflow_echo.py

import bentoml
import tensorflow as tf
import numpy as np
from PIL import Image

from bentoml.artifact import (
    TensorflowSavedModelArtifact,
)
from bentoml.handlers import TensorflowTensorHandler


FASHION_MNIST_CLASSES = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


@bentoml.env(pip_dependencies=['tensorflow', 'numpy', 'scikit-learn'])
@bentoml.artifacts([TensorflowSavedModelArtifact('model')])
class EchoServicer(bentoml.BentoService):

    @bentoml.api(TensorflowTensorHandler)
    def predict(self, tensor):
        outputs = self.artifacts.model(tensor)
        return outputs


Overwriting tensorflow_echo.py


In [12]:
# save model
from tensorflow_echo import EchoServicer
bento_svc = EchoServicer()
bento_svc.pack("model", custom_model)
saved_path = bento_svc.save()

INFO:tensorflow:Assets written to: /tmp/bentoml-temp-mic06xz3/EchoServicer/artifacts/model_saved_model/assets
running sdist
running egg_info
writing BentoML.egg-info/PKG-INFO
writing dependency_links to BentoML.egg-info/dependency_links.txt
writing entry points to BentoML.egg-info/entry_points.txt
writing requirements to BentoML.egg-info/requires.txt
writing top-level names to BentoML.egg-info/top_level.txt
reading manifest file 'BentoML.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'


no previously-included directories found matching 'examples'
no previously-included directories found matching 'tests'
no previously-included directories found matching 'docs'


writing manifest file 'BentoML.egg-info/SOURCES.txt'
running check





creating BentoML-0.5.2+41.g2222433.dirty
creating BentoML-0.5.2+41.g2222433.dirty/BentoML.egg-info
creating BentoML-0.5.2+41.g2222433.dirty/bentoml
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/artifact
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/bundler
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/cli
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/clipper
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/configuration
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/deployment
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/deployment/aws_lambda
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/deployment/sagemaker
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/handlers
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/migrations
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/migrations/versions
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/proto
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/repository
creating BentoML-0.5.2+41.g2222433.dirty/bentoml/server
creat

### Test packed BentoML service

In [33]:
bento_svc.predict([1, 2, 3])

<tf.Tensor: id=599, shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>

### Run REST API server locally

In [21]:
!bentoml serve {saved_path}

2019-12-20 11:40:24.857641: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2019-12-20 11:40:24.879917: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2712000000 Hz
2019-12-20 11:40:24.880228: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55f9480ba9c0 executing computations on platform Host. Devices:
2019-12-20 11:40:24.880265: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): Host, Default Version
2019-12-20 11:40:24.880511: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
 * Serving Flask app "EchoServicer" (lazy loading)
 * Envir

### Send prediction request to REST API server

*Run the following command in terminal to make a HTTP request to the API server*
```bash
curl -i \
--header "Content-Type: application/json" \
--request POST \
--data '{"instances": [[1, 2]]}' \
localhost:5000/predict
```

In [22]:
import requests
import json
headers = {"content-type": "application/json"}
data = json.dumps(
    {"instances": [[1, 2, 2, 3], [2, 3, 3, 4]]}
)
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
json_response = requests.post(f'http://127.0.0.1:5000/predict', data=data, headers=headers)
print(json_response)
print(json_response.text)


Data: {"instances": [[1, 2, 2, 3], [2, 3, 3, 4]]} ... , 3, 4]]}
<Response [200]>
{"predictions": [[1, 2, 2, 3], [2, 3, 3, 4]]}


# "pip install" a BentoML archive

BentoML user can directly pip install saved BentoML archive with `pip install $SAVED_PATH`,  and use it as a regular python package.

In [None]:
!pip install {saved_path}

In [19]:
import EchoServicer

installed_svc = EchoServicer.load()

In [20]:
installed_svc.predict({ 'instances': [[1, 2] })

array([[1]], dtype=int32)

# CLI access

`pip install $SAVED_PATH` also installs a CLI tool for accessing the BentoML service

In [None]:
!EchoServicer --help

### Print model service information:

In [None]:
!EchoServicer info

### Run 'predict' api with json data:

In [None]:
!EchoServicer predict --input='{"instances": [[1, 2]]}'

Additional: Serve with tf-serving
----
Bentoml TensorFlow handler and artifact is following the API of tensorflow-serving REST API.  
To install tensorflow-serving, see: https://www.tensorflow.org/tfx/serving/setup


In [28]:
TMP_MODEL_DIR = "/tmp/test-echo-model"
TMP_MODEL_VERSION = "1"
TMP_MODEL_DIR_V = f"{TMP_MODEL_DIR}/{TMP_MODEL_VERSION}"
MODEL_NAME = "echo_model"

tf.saved_model.save(custom_model, TMP_MODEL_DIR_V)
!tensorflow_model_server --rest_api_port=5001 --model_name={MODEL_NAME} --model_base_path={TMP_MODEL_DIR}


INFO:tensorflow:Assets written to: /tmp/test-echo-model/2/assets
2019-12-20 12:03:01.458521: I tensorflow_serving/model_servers/server.cc:85] Building single TensorFlow model file config:  model_name: echo_model model_base_path: /tmp/test-echo-model
2019-12-20 12:03:01.458658: I tensorflow_serving/model_servers/server_core.cc:462] Adding/updating models.
2019-12-20 12:03:01.458673: I tensorflow_serving/model_servers/server_core.cc:573]  (Re-)adding model: echo_model
2019-12-20 12:03:01.559267: I tensorflow_serving/core/basic_manager.cc:739] Successfully reserved resources to load servable {name: echo_model version: 2}
2019-12-20 12:03:01.559323: I tensorflow_serving/core/loader_harness.cc:66] Approving load for servable version {name: echo_model version: 2}
2019-12-20 12:03:01.559349: I tensorflow_serving/core/loader_harness.cc:74] Loading servable version {name: echo_model version: 2}
2019-12-20 12:03:01.559384: I external/org_tensorflow/tensorflow/cc/saved_model/reader.cc:31] Reading

In [32]:
import requests
import json

TMP_MODEL_DIR = "/tmp/test-echo-model"
TMP_MODEL_VERSION = "1"
TMP_MODEL_DIR_V = f"{TMP_MODEL_DIR}/{TMP_MODEL_VERSION}"
MODEL_NAME = "echo_model"
headers = {"content-type": "application/json"}
data = json.dumps(
    {"instances": [[1, 2, 2, 3], [2, 3, 3, 4]]}
)
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
json_response = requests.post(f'http://127.0.0.1:5001/v{TMP_MODEL_VERSION}/models/{MODEL_NAME}:predict',
                              data=data, headers=headers)
print(json_response)
print(json_response.text)


Data: {"instances": [[1, 2, 2, 3], [2, 3, 3, 4]]} ... , 3, 4]]}
<Response [200]>
{
    "predictions": [[1.0, 2.0, 2.0, 3.0], [2.0, 3.0, 3.0, 4.0]
    ]
}
