# Basic classification: Classify images of clothing
A tensorflow serving style service example using BentoML


![Impression](https://www.google-analytics.com/collect?v=1&tid=UA-112879361-3&cid=555&t=event&ec=tensorflow&ea=tensorflow_1_fashion_mnist&dt=tensorflow_1_fashion_mnist)

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
!pip install -q bentoml tensorflow==1.14.0 matplotlib "numpy<1.17"
# why numpy<1.17: https://github.com/tensorflow/tensorflow/issues/30427

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import io

# TensorFlow
import tensorflow as tf

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)

1.14.0


In [2]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(_train_images, train_labels), (_test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
train_images = _train_images / 255.0
test_images = _test_images / 255.0

train_x = np.reshape(train_images, [-1, 28, 28, 1])
train_y = np.eye(10)[train_labels]  # one hot

test_x = np.reshape(test_images, [-1, 28, 28, 1])
test_y = np.eye(10)[test_labels]  # one hot

In [3]:
input_shape = [None, 28, 28, 1]
number_of_classes = 10

In [4]:
#Function below builds model graph 
def cnn_model_fn(input_shape, number_of_classes, learning_rate):
    raw = tf.placeholder(tf.string, shape=[None])
    with tf.device("/cpu:0"): # map_fn has issues on GPU https://github.com/tensorflow/tensorflow/issues/28007
        img_array = tf.map_fn(lambda i: tf.io.decode_png(i, channels=1), raw, dtype=tf.uint8)
    img_array = tf.cast(img_array, tf.float32)
    img_array = (255.0 - img_array) / 255.0
    
    input_layer = tf.reshape(img_array, [-1, 28, 28, 1])

    #input_layer = tf.placeholder(tf.float32, shape=input_shape)
    labels = tf.placeholder(tf.float32, shape=[None, number_of_classes])
    
    #Train mode is used with dropout layers. We want effectively disable the dropout layers while
    #evaluation and predict and use it only while training
    train_mode = tf.placeholder(tf.bool)
    
    #Architecture: image ->conv2d->maxpooling->conv2d->maxpooling->flatten->dense->dropout->logits->softmax
    
    #convolution layer 1
    conv1 = tf.layers.conv2d(
        inputs=input_layer, 
        filters=32, 
        kernel_size=[5, 5], 
        padding="same", 
        activation=tf.nn.relu)
    
    #pooling layer 1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    
    #convolution layer 2
    conv2 = tf.layers.conv2d(
        inputs=pool1, 
        filters=64, 
        kernel_size=[5, 5], 
        padding="same", 
        activation=tf.nn.relu)
    
    #pooling layer 1
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    
    #flatten the output volume of pool2 into a vector
    pool2_flat = tf.reshape(pool2, shape=[-1, 7*7*64])
    
    #dense layer
    dense = tf.layers.dense(
        inputs=pool2_flat, 
        units=1024,
        activation=tf.nn.relu)
    
    #dropout regularization
    dropout = tf.layers.dropout(
        inputs=dense, 
        rate=0.3, 
        training= train_mode)
    
    #logits layer
    logits = tf.layers.dense(inputs=dropout, units=10)
    
    predictions = {
        "classes" : tf.argmax(input=logits, axis=1),
        "probabilities" : tf.nn.softmax(logits=logits)
    }
    
    #loss
    loss = tf.losses.softmax_cross_entropy(labels, logits)
    
    #training operartion
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    
    #accuracy
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)), tf.float32))
    
    return { "logits": logits,
             "predictions": predictions,
             "loss": loss,
             "train_op": train_op,
             "accuracy": accuracy,
             "raw_x": raw,
             "x": input_layer,
             "y": labels,
             "train_mode": train_mode }

In [5]:
learning_rate = 0.01
batch_size = 1000
epoch = 5

tf.reset_default_graph()
cnn_model = cnn_model_fn(input_shape, number_of_classes, learning_rate)
x = cnn_model["x"]
y= cnn_model["y"]
train_mode = cnn_model["train_mode"]

Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## Train the model


In [7]:
!rm -r test_model

In [8]:
with tf.Session() as sess:
    with tf.device("/gpu:0"):
        sess.run(tf.global_variables_initializer())

        #Divide input training set into mini batches of size batch_size.
        #If the total number of training examles is not exactly divisible by batch_size, 
        #the last batch will have less number of examples than batch_size.

        total_size = train_x.shape[0]
        number_of_batches = int(total_size/batch_size)

        print("Training:Start")
        for e in range(epoch):
            epoch_cost = 0
            epoch_accuracy = 0
            print("Epoch {}:".format(e+1))
            for i in range(number_of_batches):
                print("#", end='')
                mini_x = train_x[i*batch_size:(i+1)*batch_size, :, :, :]
                mini_y = train_y[i*batch_size:(i+1)*batch_size, :]
                _, cost = sess.run([cnn_model["train_op"], cnn_model["loss"]], 
                    feed_dict={x:mini_x, 
                               y:mini_y,
                               train_mode:True})
                train_accuracy = sess.run(cnn_model["accuracy"], 
                    feed_dict={x:mini_x, 
                               y:mini_y,
                               train_mode:False})
                epoch_cost += cost
                epoch_accuracy += train_accuracy

            #If the total number of training examles is not exactly divisible by batch_size, 
            #we have one more batch of size (total_size - number_of_batches*batch_size)
            if total_size % batch_size != 0:
                print("#", end='')
                mini_x = train_x[number_of_batches*batch_size:total_size, :, :, :]
                mini_y = train_y[number_of_batches*batch_size:total_size, :]
                _, cost = sess.run([cnn_model["train_op"], cnn_model["loss"]], 
                    feed_dict={x:mini_x, 
                               y:mini_y,
                               train_mode:True})
                train_accuracy = sess.run(cnn_model["accuracy"], 
                    feed_dict={x:mini_x, 
                               y:mini_y,
                               train_mode: False})
                epoch_cost += cost
                epoch_accuracy += train_accuracy

            epoch_cost /= number_of_batches

            if total_size % batch_size != 0:
                epoch_accuracy /= (number_of_batches+1)
            else:
                epoch_accuracy /= number_of_batches
            print()    
            print("loss: {:02.2f} accuracy: {:02.2f} ".format(np.squeeze(epoch_cost), epoch_accuracy))
            #Cross validation loss and accuracy
            cv_loss, cv_accuracy = sess.run([cnn_model["loss"], cnn_model["accuracy"]], 
                                        {x:test_x, 
                                         y:test_y,
                                         train_mode: False})
            print("val_loss: {:02.2f} val_accuracy: {:02.2f}".format(np.squeeze(cv_loss), cv_accuracy))

        print("Training:End")


        #prediction for test set
        test_accuracy, prediction = sess.run([cnn_model["accuracy"], 
                                              cnn_model["predictions"]["classes"]], 
                                             {x:test_x, y:test_y, train_mode:False})
        print("Test accuracy {:02.2f}".format(test_accuracy))

    with tf.device("/cpu:0"):
        inputs = {"x":cnn_model['raw_x'], "train_mode":cnn_model['train_mode']}
        outputs = {"prediction": cnn_model['predictions']['classes']}
        tf.saved_model.simple_save(sess, 'test_model', inputs=inputs, outputs=outputs)

Training:Start
Epoch 1:
############################################################
loss: 0.86 accuracy: 0.75 
val_loss: 0.40 val_accuracy: 0.85
Epoch 2:
############################################################
loss: 0.35 accuracy: 0.88 
val_loss: 0.32 val_accuracy: 0.88
Epoch 3:
############################################################
loss: 0.29 accuracy: 0.90 
val_loss: 0.29 val_accuracy: 0.89
Epoch 4:
############################################################
loss: 0.26 accuracy: 0.91 
val_loss: 0.27 val_accuracy: 0.90
Epoch 5:
############################################################
loss: 0.24 accuracy: 0.92 
val_loss: 0.28 val_accuracy: 0.90
Training:End
Test accuracy 0.90
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: test_model/saved_model.pb


# Model inference test run (Ipython kernel restarting required!!)


In [12]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

with open("test.png", "rb") as f:
    img_bytes = f.read()

In [2]:
import tensorflow as tf

tf.compat.v1.enable_eager_execution()


loaded = tf.compat.v2.saved_model.load('test_model')
loaded_func = loaded.signatures[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
pred = loaded_func(x=tf.constant([img_bytes], dtype=tf.string), train_mode=tf.constant(False))
output = pred['prediction']
[class_names[c] for c in output]

['Ankle boot']

And the model predicts a label as expected.

# Create BentoService class

In [3]:
%%writefile tensorflow_1_fashion_mnist.py

import bentoml
import tensorflow as tf

from bentoml.frameworks.tensorflow import TensorflowSavedModelArtifact
from bentoml.adapters import TfTensorInput

tf.compat.v1.enable_eager_execution() # required

FASHION_MNIST_CLASSES = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


@bentoml.env(pip_dependencies=['tensorflow', 'numpy', 'pillow'])
@bentoml.artifacts([TensorflowSavedModelArtifact('trackable')])
class FashionMnistTensorflow(bentoml.BentoService):

    @bentoml.api(input=TfTensorInput(), batch=True)
    def predict(self, inputs):
        loaded_func = self.artifacts.trackable.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
        pred = loaded_func(x=inputs, train_mode=tf.constant(False))
        output = pred['prediction']
        return [FASHION_MNIST_CLASSES[c] for c in output]

Overwriting tensorflow_1_fashion_mnist.py


In [13]:
from tensorflow_1_fashion_mnist import FashionMnistTensorflow

bento_svc = FashionMnistTensorflow()
bento_svc.pack("trackable", "test_model/")
saved_path = bento_svc.save()

[2020-09-23 02:37:08,468] INFO - Detected non-PyPI-released BentoML installed, copying local BentoML modulefiles to target saved bundle path..


  if version != normalized:
no previously-included directories found matching 'e2e_tests'
no previously-included directories found matching 'tests'
no previously-included directories found matching 'benchmark'


UPDATING BentoML-0.9.0rc0+7.g8af1c8b/bentoml/_version.py
set BentoML-0.9.0rc0+7.g8af1c8b/bentoml/_version.py to '0.9.0.pre+7.g8af1c8b'
[2020-09-23 02:37:09,283] INFO - BentoService bundle 'FashionMnistTensorflow:20200923023648_D5975E' saved to: /home/bentoml/bentoml/repository/FashionMnistTensorflow/20200923023648_D5975E


## Use BentoService with BentoML CLI

**`bentoml get <BentoService Name>` list all of BentoService's versions**

In [14]:
!bentoml get FashionMnistTensorflow

[39mBENTO_SERVICE                                 AGE           APIS                                  ARTIFACTS                                LABELS
FashionMnistTensorflow:20200923023648_D5975E  7.73 seconds  predict<TfTensorInput:DefaultOutput>  trackable<TensorflowSavedModelArtifact>[0m


**`bentoml get <BentoService name>:<bentoService version>` display detailed information of the specific BentoService version**

In [15]:
!bentoml get FashionMnistTensorflow:latest 

[2020-09-23 02:37:28,334] INFO - Getting latest version FashionMnistTensorflow:20200923023648_D5975E
[39m{
  "name": "FashionMnistTensorflow",
  "version": "20200923023648_D5975E",
  "uri": {
    "type": "LOCAL",
    "uri": "/home/bentoml/bentoml/repository/FashionMnistTensorflow/20200923023648_D5975E"
  },
  "bentoServiceMetadata": {
    "name": "FashionMnistTensorflow",
    "version": "20200923023648_D5975E",
    "createdAt": "2020-09-22T18:37:09.233305Z",
    "env": {
      "condaEnv": "name: bentoml-default-conda-env\nchannels:\n- conda-forge\n- defaults\ndependencies:\n- pip\n",
      "pythonVersion": "3.6.10",
      "dockerBaseImage": "bentoml/model-server:0.9.0.pre-py36",
      "pipPackages": [
        "bentoml==0.9.0.pre",
        "tensorflow==1.14.0",
        "numpy==1.16.6",
        "pillow==7.2.0"
      ]
    },
    "artifacts": [
      {
        "name": "trackable",
        "artifactType": "TensorflowSavedModelArtifact"
      }
    ],
    "apis": [
      {
        "name": 

**Serve bentoml REST server locally**

In [2]:
!bentoml serve FashionMnistTensorflow:latest

[2020-09-23 02:38:47,885] INFO - Getting latest version FashionMnistTensorflow:20200923023648_D5975E
[2020-09-23 02:38:47,886] INFO - Starting BentoML API server in development mode..
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
2020-09-23 02:38:50.293801: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1
2020-09-23 02:38:50.308955: I tensorflow/stream_executor/

## Query REST API with python

In [8]:
import base64
import json
import requests

with open("test.png", "rb") as f:
    img_bytes = f.read()
img_b64 = base64.b64encode(img_bytes).decode()


headers = {"content-type": "application/json"}
data = json.dumps(
       {"instances": [{"b64": img_b64}]}
)
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))

json_response = requests.post(f'http://localhost:5000/predict', data=data, headers=headers)
print(json_response)
print(json_response.text)

Data: {"instances": [{"b64": "iVBORw0KGgoAAAANSUhEUgAAAB ... ufkz8DPG//sD/AX8I8DvdgnOxdB4B1wAAAAASUVORK5CYII="}]}
<Response [200]>
["Ankle boot"]


## Build realtime prediction service in docker with BentoService

In [6]:
!bentoml containerize FashionMnistTensorflow:latest -t fashionmnisttensorflow:latest

[2020-09-23 02:48:05,458] INFO - Getting latest version FashionMnistTensorflow:20200923023648_D5975E
[39mFound Bento: /home/bentoml/bentoml/repository/FashionMnistTensorflow/20200923023648_D5975E[0m
Building Docker image fashionmnisttensorflow:latest from FashionMnistTensorflow:latest 
|[39mStep 1/15 : FROM bentoml/model-server:0.9.0.pre-py36[0m
[39m ---> 4aac43d10e50[0m
[39mStep 2/15 : ARG EXTRA_PIP_INSTALL_ARGS=[0m
[39m ---> Using cache[0m
[39m ---> 790054f5ad85[0m
[39mStep 3/15 : ENV EXTRA_PIP_INSTALL_ARGS $EXTRA_PIP_INSTALL_ARGS[0m
[39m ---> Using cache[0m
[39m ---> 85b0a1b40542[0m
[39mStep 4/15 : COPY environment.yml requirements.txt setup.sh* bentoml-init.sh python_version* /bento/[0m
[39m ---> Using cache[0m
[39m ---> 416fc81180f3[0m
[39mStep 5/15 : WORKDIR /bento[0m
[39m ---> Using cache[0m
[39m ---> 02b613c56380[0m
[39mStep 6/15 : RUN chmod +x /bento/bentoml-init.sh[0m
[39m ---> Using cache[0m
[39m ---> 6a6ef17c3452[0m
[39mStep 7/15 : RUN i

In [15]:
!docker run -p 5000:5000 fashionmnisttensorflow:latest --workers 1 --enable-microbatch

[2020-09-22 18:56:21,412] INFO - Starting BentoML API server in production mode..
[2020-09-22 18:56:21,638] INFO - Running micro batch service on :5000
[2020-09-22 18:56:21 +0000] [12] [INFO] Starting gunicorn 20.0.4
[2020-09-22 18:56:21 +0000] [1] [INFO] Starting gunicorn 20.0.4
[2020-09-22 18:56:21 +0000] [12] [INFO] Listening at: http://0.0.0.0:5000 (12)
[2020-09-22 18:56:21 +0000] [12] [INFO] Using worker: aiohttp.worker.GunicornWebWorker
[2020-09-22 18:56:21 +0000] [1] [INFO] Listening at: http://0.0.0.0:54525 (1)
[2020-09-22 18:56:21 +0000] [1] [INFO] Using worker: sync
[2020-09-22 18:56:21 +0000] [13] [INFO] Booting worker with pid: 13
[2020-09-22 18:56:21 +0000] [14] [INFO] Booting worker with pid: 14
[2020-09-22 18:56:21,687] INFO - Micro batch enabled for API `predict`
[2020-09-22 18:56:21,687] INFO - Your system nofile limit is 1048576, which means each instance of microbatch service is able to hold this number of connections at same time. You can increase the number of file

# Deploy BentoService as REST API server to the cloud


BentoML support deployment to multiply cloud provider services, such as AWS Lambda, AWS Sagemaker, Google Cloudrun and etc. You can find the full list and guide on the documentation site at https://docs.bentoml.org/en/latest/deployment/index.html

For this demo, we are going to deploy to AWS Sagemaker

In [None]:
bento_service_tag = f'{bento_svc.name}:{bento_svc.version}'
print(bento_service_tag)

In [None]:
!bentoml sagemaker deploy first-tf-fashion -b {bento_service_tag} --api-name predict --verbose

In [None]:
!bentoml sagemaker get first-tf-fashion

In [None]:
!aws sagemaker-runtime invoke-endpoint --endpoint-name dev-first-tf-fashion --content-type 'application/json' \
--body "{\"instances\":[{\"b64\":\"iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAYAAAByDd+UAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAA2dJREFUSIntlk9L60oYh59Mp43axJoiVj2KboSuXPoJXLkQ/Fx+Ahe6ceFG3CtdWlFcFf9RKlhFatpSbWuaNLHJnIW3OR7ugcu9BzxccGAgZCbv887v/b1JNKWU4hOH+EzYF/AL+OeBYRjysct83wegUqn8HlApRRRFADw+PrK/v4/jOCQSCTRNi/fpug7AwcHB7wEBhHh/9Pj4mGKxyM7Ozt/2NBoNdnd3MU0zvif/CywMQ6SUnJ+fc3NzQy6Xo1KpsLGxgWVZ9Pt9FhYWaLVadLtd5ubmfiT6b2FRFCGlxHEc9vf3UUrR7/d5fX1FKRXPq6srpJRYlsVgMPhn4LD4URTF12EYxlJubW2Ry+WYnp7G8zz6/T65XC6uYzqdRtd1fN+n2+3iOM6vgcPgw+ILIdA0jTAMSSQSAOzt7WHbNtPT0xiGQbvdJpvNMjU1RTKZJAxD3t7e4ni9Xo/b29tfA4egKIoYDAZxAkPYzs4OpVKJ+fl5Wq0W7XYbz/PIZDK8vr6iaRpjY2Mkk0mUUnG8w8ND4INphjbXNA2lFEKIWD6AWq3GwcEBnuextLSE4zj4vk+r1SKVSqFpGq7rxsnpuo4QgnQ6jRCCYrH4DhxK9TH4MKtms0m1WqVcLvP09EQqlWJ8fJx2u0232+Xt7Q3f9xFCUK1WGQwGTExMkEwmEUKglGJ0dJQwDDEMg8vLS+RQqnq9zv39Pb1ej16vh+d53N3d4bouUkpM0ySKIjqdDp7nIaXEdV1GR0fRdZ0gCJidnaXT6eC6LpZl4TgOLy8vpNNpbNvm+fn5XdJCoUCtVkNKSbPZjA0yBDmOg23bKKXwfR/LsoiiCMdxCMOQdDqNYRhkMhkajUaslGVZCCHwPI8gCJBSIo+Ojtje3iafzzMzMxOfJJVKxe9G0zQJggAhRNxvnuehaRpRFGHbNvV6nevra4IgIAxDAAzDwHVddF3HMAympqaQKysrnJ6ecnFx8aOwf50sm82SzWbJZDIEQYBSilarRblcxnVdut0umqZRKpVYXl5mcXGRQqGA7/uxD6SUfPv2DdM031388SfKcRzOzs4ol8ucnJzQbDZ/atqhA7PZLPl8ntXVVdbW1hgZGYnX19fXeXh4YHJyEtM0MU0TKSW6rrO5ufkz8DPG//sD/AX8I8DvdgnOxdB4B1wAAAAASUVORK5CYII=\"}]}" \
output.json && cat output.json

In [None]:
!bentoml sagemaker delete first-tf-fashion --force