# Serving

In [None]:
!pip3 install -Uq grpcio==1.26.0
!pip3 install -q matplotlib
!pip3 install -q tensorflow-datasets
!pip3 install -q requests

## 0. Model training

In [None]:
import os
import json
import requests
import subprocess
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

**as_supervised**: Returns tuple `(img, label)` instead of dict `{'image': img, 'label': label}`

In [None]:
(ds_train, ds_test), ds_info = tfds.load(
    'fashion_mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)
assert isinstance(ds_train, tf.data.Dataset)
assert isinstance(ds_test, tf.data.Dataset)

In [None]:
tfds.visualization.show_examples(ds_train, ds_info)

In [None]:
#tfds.core.benchmark(train_ds, batch_size=32)

In [None]:
@tf.function
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

In [None]:
ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
tfds.core.benchmark(ds_train, batch_size=32)

In [None]:
ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Conv2D(
      input_shape=(28,28,1), 
      filters=8,
      kernel_size=3,
      strides=2,
      padding='same',
      activation='relu',
      name='Conv1'
  ),
  tf.keras.layers.Conv2D(
      input_shape=(14,14,8), 
      filters=8,
      kernel_size=3,
      strides=2,
      activation='relu',
      name='Conv2'
  ),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(
      10,
      activation=tf.nn.softmax,
      name='Softmax'
  )
])
model.summary()

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
model.summary()

In [None]:
model.fit(
    ds_train,
    epochs=5,
    validation_data=ds_test,
)

**SavedModel directory**

The structure of a SavedModel directory sollows the same structure:

```
assets/
assets.extra/
variables/
    variables.data-?????-of-?????
    variables.index
saved_model.pb|saved_model.pbtxt
```

where:
   * **assets** is a subfolder containing auxiliary (external) files, such as vocabularies. Assets are copied to the SavedModel location and can be read when loading a specific MetaGraphDef.
   * **assets.extra** is a subfolder where higher-level libraries and users can add their own assets that co-exist with the model, but are not loaded by the graph. This subfolder is not managed by the SavedModel libraries.
   * **variables** is a subfolder that includes output from tf.train.Saver.
   * **saved_model.pb** or **saved_model.pbtxt** is the SavedModel protocol buffer. It includes the graph definitions as MetaGraphDef protocol buffers.


In [None]:
# Fetch the Keras session and save the model
# The signature definition is defined by the input and output tensors,
# and stored with the default serving key
import tempfile

MODEL_DIR = tempfile.gettempdir()
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))

tf.keras.models.save_model(
    model,
    export_path,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
)

print('\nSaved model:')
!ls -l {export_path}

In [None]:
!saved_model_cli show --dir {export_path} --all

In [None]:
!saved_model_cli show --dir {export_path}

In [None]:
!saved_model_cli show --dir {export_path} --tag_set serve

## 2. Serving a simple model

In [None]:
%%capture
!apt-get install -y curl
!echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
!apt update

In [None]:
!apt-get install tensorflow-model-server

Usually, the easiest way to do this is using the docker image. In this case, we will launch the server from this notebook.

In [None]:
os.environ["MODEL_DIR"] = MODEL_DIR

In [None]:
!(nohup tensorflow_model_server \
  --rest_api_port=8501 \
  --model_name=fashion_model \
  --model_base_path="{MODEL_DIR}") >server.log 2>&1

### Deployment

Open a new terminal, go to this project folder's, and type this:

```bash
sudo docker exec -it jupyter-tf bash
```

Once you're inside the container, launch the server:

```bash
tensorflow_model_server --rest_api_port=8501 --model_name=fashion_model --model_base_path=/tmp
```

In [None]:
for images, labels in ds_train.take(1):
    print(batch.shape)

In [None]:
test_images = batch.numpy()

In [None]:
data = json.dumps({"signature_name": "serving_default", "instances": test_images.tolist()})
print('Data: {}...'.format(data[:80]))

In [None]:
headers = {"content-type": "application/json"}
json_response = requests.post('http://localhost:8501/v1/models/fashion_model:predict', data=data, headers=headers)
predictions = json.loads(json_response.text)['predictions']
tf.math.argmax(predictions, axis=1)

### Python vs Serving

In [None]:
%%timeit -n10 -r 10
model.predict(images)

In [None]:
%%timeit -n10 -r 10
json_response = requests.post(
    'http://localhost:8501/v1/models/fashion_model:predict',
    data=data,
    headers=headers
)