In [25]:
import json
import requests
import grpc

import tensorflow as tf
import numpy as np

from pathlib import Path
from tensorflow_serving.apis.predict_pb2 import PredictRequest
from tensorflow_serving.apis import prediction_service_pb2_grpc

## Prerequisites

* Run the docker-compose file to launch TF serving
* Train the MNIST model (cell below) and makes sure it's saved 

In [31]:
# Training a model on MNIST. We'll use it as an example model for this chapter

mnist = tf.keras.datasets.mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = mnist
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

# extra code – build & train an MNIST model (also handles image preprocessing)
tf.random.set_seed(42)
tf.keras.backend.clear_session()
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28], dtype=tf.uint8),
    tf.keras.layers.Rescaling(scale=1 / 255),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-2),
              metrics=["accuracy"])
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

model_name = "my_mnist_model"
model_version = "0002"
model_path = Path(model_name) / model_version
model.save(model_path, save_format="tf")



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: my_mnist_model/0002/assets


INFO:tensorflow:Assets written to: my_mnist_model/0002/assets


In [16]:
# Querying TF serving through the REST API

# n example instances for inference
n = 10
X_new = X_test[:n]

# Serving_default points to the model we want to use.
# We need to convert numpy array to python list and serialize to json
request_json = json.dumps({
  "signature_name": "serving_default",
  "instances": X_new.tolist()
})

# Making the request
server_url = "http://localhost:8501/v1/models/my_mnist_model:predict"
response = requests.post(server_url, data=request_json)
response.raise_for_status()
response = response.json()

# Parsing the response
y_proba = np.array(response["predictions"])
preds = np.argmax(y_proba, axis=1)
print ("Actuals:     ", y_test[:n])
print ("Predictions: ", preds)


Actuals:      [7 2 1 0 4 1 4 9 5 9]
Predictions:  [7 2 1 0 4 1 4 9 6 9]


In [30]:
# Querying TF serving through the gRPC API

# Create and populate a protocol buffer 
request = PredictRequest()
request.model_spec.name = model_name
request.model_spec.signature_name = "serving_default"

# Input layer of the model: flatten_input
input_name = model.input_names[0]

# Populate the input
request.inputs[input_name].CopyFrom(tf.make_tensor_proto(X_new))

# Make the request
channel = grpc.insecure_channel("localhost:8500")
predict_service = prediction_service_pb2_grpc.PredictionServiceStub(channel)
response = predict_service.Predict(request, timeout=10.0)

# Parse response

# Output layer name of the model: dense_1
output_name = model.output_names[0]
outputs_proto = response.outputs[output_name]
y_proba = tf.make_ndarray(outputs_proto)

preds = np.argmax(y_proba, axis=1)
print ("Actuals:     ", y_test[:n])
print ("Predictions: ", preds)

Actuals:      [7 2 1 0 4 1 4 9 5 9]
Predictions:  [7 2 1 0 4 1 4 9 6 9]
