# Imports

In [86]:
# math and data
import numpy as np
import pandas as pd
import math

# time
import time

# functools
from functools import partial

# visualization and plotting
from IPython import display
import matplotlib as mpl
import matplotlib.pyplot as plt

# to plot nice figures
%matplotlib inline
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# smooth animations
import matplotlib.animation as animation
mpl.rc('animation', html='jshtml')

# handle files
import os
import sys
import joblib

# hash table classes
from collections import Counter

# output
from tqdm.notebook import tqdm
from tqdm.notebook import trange

# deep learning
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import tensorflow_addons as tfa
import tensorflow_hub as hub

# reinforcement learning
import gym

# Setup

In [109]:
MODEL_DIR = os.path.join(".", "_tf_saved_models", "18_deploy")

model_version = "0001"
model_name = "my_mnist_model"
model_path = os.path.join(MODEL_DIR, model_name, model_version)

print(model_path)

./_tf_saved_models/18_deploy/my_mnist_model/0001


# Learning

Let's learn how to train and deploy models at scale (following chapter 19 of handsonml).

## TF Serving

Let's learn to use TF Serving to deploy our models as a queryable microservice.

### Train Model

Let's train a quick mnist model.

In [5]:
train_set_raw, valid_set_raw, test_set_raw = tfds.load("mnist", as_supervised=True, 
                                                       split=["train", "test[:50%]", "test[50%:]"])

In [90]:
def random_crop(X):
    """
    Randomly crops a given image
    """
    shape = tf.shape(X)
    dims_factor = tf.random.uniform([], 0.9, 1.0, dtype=tf.float32)
    height_dim  = tf.multiply(dims_factor, tf.cast(shape[0], tf.float32))
    width_dim   = tf.multiply(dims_factor, tf.cast(shape[1], tf.float32))
    X_cropped = tf.image.random_crop(X, [height_dim, width_dim, 1])
    X_final = tf.image.resize(X_cropped, shape[:2])
    return X_final

In [91]:
def preprocess(X_batch, y_batch, augment=False):
    X_batch = tf.cast(X_batch, tf.float32) / 255.
    if augment:
        X_batch = tf.map_fn(random_crop, X_batch)
    return X_batch, y_batch

In [196]:
batch_size = 32
train_set = train_set_raw.shuffle(60000).batch(batch_size).map(partial(preprocess, augment=True)).prefetch(1)
valid_set = valid_set_raw.batch(batch_size).map(preprocess).prefetch(1)
test_set = test_set_raw.batch(batch_size).map(preprocess).prefetch(1)

In [93]:
def visualize_digits(X_batch):
    shape = tf.shape(X_batch)
    n_digits = shape[0]
    n_rows = 6
    n_cols = 6
    plt.figure(figsize=(2 * n_rows, 2 * n_cols))
    for index in range(n_digits):
        plt.subplot(n_rows, n_cols, index + 1)
        plt.imshow(X_batch[index], cmap="binary")
        plt.axis("off")
    plt.show()

In [28]:
model = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[28, 28, 1]),
    keras.layers.Conv2D(filters=16, kernel_size=5, strides=2, 
                        padding="same", activation="relu"),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation="relu"),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(10, activation="softmax")
])

In [106]:
optimizer = keras.optimizers.Nadam(learning_rate=5e-6)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

In [107]:
model.fit(train_set, epochs=1,
          validation_data=valid_set)



<keras.callbacks.History at 0x7fcc6c433be0>

In [108]:
model.evaluate(test_set)



[0.02744220197200775, 0.9901999831199646]

### Export SavedModel

Now that we've trained a mnist model with 99% accuracy on test set, let's export it to the `SavedModel` format.

In [110]:
tf.saved_model.save(model, model_path)



INFO:tensorflow:Assets written to: ./_tf_saved_models/18_deploy/my_mnist_model/0001/assets


INFO:tensorflow:Assets written to: ./_tf_saved_models/18_deploy/my_mnist_model/0001/assets


Run the below docker command to create a docker container using the tensorflow/serving image and binding our model directory to the mountpoint used by the container.

`docker run -it --rm -p 8500:8500 -p 8501:8501 \
-v "$HOME/ml/handsonml/_tf_saved_models/18_deploy/my_mnist_model:/models/my_mnist_model" \
-e MODEL_NAME=my_mnist_model \
tensorflow/serving`

### Query TF Serving via Rest API

Let's create our json used to query the model.

In [141]:
X_new, y_new = next(iter(test_set.take(1)))

In [142]:
X_new = X_new.numpy()

Create json string.

In [118]:
import json

input_data_json = json.dumps({
    "signature_name": "serving_default",
    "instances": X_new.tolist(),
})

The json string is very long.

In [128]:
print(input_data_json[:100], "\n\n...\n\n", input_data_json[-100:])

{"signature_name": "serving_default", "instances": [[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0 

...

 [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]]]}


In [131]:
type(input_data_json), len(input_data_json)

(str, 248981)

Now let's send an HTTP POST request to our container.

In [132]:
import requests

SERVER_URL = "http://localhost:8501/v1/models/my_mnist_model:predict"
response = requests.post(SERVER_URL, data=input_data_json)
response.raise_for_status() # raise exception if error
response_json = response.json()

In [136]:
y_probas = np.array(response_json["predictions"])
y_probas.shape

(32, 10)

In [138]:
y_pred = np.argmax(y_probas, axis=1)

In [144]:
y_pred == y_new

<tf.Tensor: shape=(32,), dtype=bool, numpy=
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])>

Nice!

### Query TF Serving via gRPC API

REST API converts numpy to string json format and back, so for transferring large numpy arrays there would be high latency and bandwidth usage.

Use gRPC which sends and receives protobufs (serialized binary format to efficiently send or store lots of data).

Let's use the `tensorflow-serving-api` library to create these protobufs for request.

In [165]:
from tensorflow_serving.apis.predict_pb2 import PredictRequest

request = PredictRequest()
request.model_spec.name = model_name
request.model_spec.signature_name = "serving_default"
input_name = model.input_names[0] # guessing this is the name of the model's input layer

request.inputs[input_name].CopyFrom(tf.make_tensor_proto(X_new))

Now let's send this `PredictRequest` object to the server and get its response.

In [167]:
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc

channel = grpc.insecure_channel("localhost:8500")
predict_service = prediction_service_pb2_grpc.PredictionServiceStub(channel)

Use the service to send request and get a `PredictResponse` object back.

In [168]:
response = predict_service.Predict(request, timeout=10.0)

Convert response to tensor.

In [191]:
output_name = model.output_names[0] # name of output layer
output_proto = response.outputs[output_name] # tensorflow.core.framework.tensor_pb2.TensorProto
y_probas = tf.make_ndarray(output_proto)

In [188]:
np.argmax(y_probas, axis=1) == y_new

<tf.Tensor: shape=(32,), dtype=bool, numpy=
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])>

### Deploying New Model Version

Let's create a new version of the model.

In [237]:
model = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=[28, 28, 1]),
    keras.layers.Conv2D(filters=32, kernel_size=5,
                        padding="same", activation="elu"),
    keras.layers.MaxPooling2D(),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=64, kernel_size=3,
                        padding="same", activation="elu"),
    keras.layers.MaxPooling2D(),
    keras.layers.BatchNormalization(),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation="selu"),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(128, activation="selu"),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(10, activation="softmax")
])

In [None]:
optimizer = keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

lr_plateau_cb = keras.callbacks.ReduceLROnPlateau(factor=0.25, patience=2)

model.fit(train_set, epochs=10,
          validation_data=valid_set,
          callbacks=[lr_plateau_cb])

In [198]:
model.evaluate(test_set)



[0.02542022615671158, 0.993399977684021]

Saving the second version of the model with a 99.3% accuracy on test set - using a deeper CNN with MaxPooling2D and deeper FC layers.

In [199]:
model_version = "0002"
model_name = "my_mnist_model"
model_path = os.path.join(MODEL_DIR, model_name, model_version)
model_path

'./_tf_saved_models/18_deploy/my_mnist_model/0002'

In [200]:
tf.saved_model.save(model, model_path)



INFO:tensorflow:Assets written to: ./_tf_saved_models/18_deploy/my_mnist_model/0002/assets


INFO:tensorflow:Assets written to: ./_tf_saved_models/18_deploy/my_mnist_model/0002/assets


### Deploy to GCP

- Create project
- Enable billing
- Create bucket on GCS
    - Upload model file to bucket
- Create model on AI Platform
    - Create version for model
- Create service account for project
    - Give AI Platform developer permissions
    - Create and download JSON key

Now we can query our model using the service account!

In [213]:
# !pip install google-api-python-client

In [222]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.join(".", "gcp_service_keys", 
                                                            "deploy_first_model_service_account_key.json")

Now let's create a resource object that wraps access to our model.

In [225]:
import googleapiclient.discovery

project_id = "deploy-first-model"
model_id = "my_mnist_model"
version_id = "v0001"
model_path = os.path.join("projects", project_id, 
                          "models", model_id,
                          "versions", version_id)
model_path

'projects/deploy-first-model/models/my_mnist_model/versions/v0001'

You will get below error if you did not set `GOOGLE_APPLICATION_CREDENTIALS` environment variable.
>DefaultCredentialsError: Could not automatically determine credentials. Please set GOOGLE_APPLICATION_CREDENTIALS or explicitly create credentials and re-run the application. For more information, please see https://cloud.google.com/docs/authentication/getting-started

In [226]:
ml_resource = googleapiclient.discovery.build(serviceName="ml", version="v1").projects()

Now let's actually implement using the resource object to call the prediction service and get predictions back.

In [232]:
output_name = 'dense_9' # from metagraph serving_default

In [233]:
def predict_from_gcp(X):
    """ 
    X has to be a numpy with shape (batch_size, 28, 28, 1) 
    and scaled from 0 to 1 with dtype float32. 
    """
    
    input_data_json = {"signature_name": "serving_default",
                       "instances": X.tolist()}
    request = ml_resource.predict(name=model_path, body=input_data_json)
    response = request.execute()
    if "error" in response:
        raise RuntimeError(response["error"])
    return np.array([pred[output_name] for pred in response["predictions"]])

In [234]:
y_probas = predict_from_gcp(X_new)

In [235]:
np.argmax(y_probas, axis=1) == y_new

<tf.Tensor: shape=(32,), dtype=bool, numpy=
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])>