<h2 align="center"> Deploy Models with TensorFlow Serving and Docker</h2>

### Task 2: Load and Preprocess Data

In [9]:
%%writefile -a train.py
import os
import time
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub

Writing train.py


In [6]:
#Souce: https://www.kaggle.com/snap/amazon-fine-food-reviews/data
!head -n 2 train.csv

'head' is not recognized as an internal or external command,
operable program or batch file.


In [10]:
%%writefile -a train.py

def ratings_mapper(rating):
    if rating >= 4:
        return 1
    elif rating == 3:
        return 0
    else:
        return -1

def load_dataset(file_path, num_rows):
    df = pd.read_csv(file_path, usecols=[6, 9], nrows=num_rows)
    df.columns = ['rating', 'title']
    
    # separate features from labels
    X = df['title'].astype(str).str.encode('ascii', 'replace').to_numpy(dtype=object)
    y = df['rating'].apply(ratings_mapper)
    
    labels = np.array(pd.get_dummies(y), dtype=int)
    return labels, X

Appending to train.py


In [8]:
tmp_labels, tmp_text = load_dataset('train.csv', 100)

tmp_labels

NameError: name 'load_dataset' is not defined

### Task 3: Build the Classification Model using TF Hub

In [11]:
%%writefile -a train.py
## https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1
## https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1

def get_model():
    hub_layer = hub.KerasLayer('https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1',
                              output_shape=[128], input_shape=[], dtype=tf.string, name='input', trainable=False)
    
    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='softmax', name='output'))
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
    model.summary()
    return model


Appending to train.py


In [None]:
embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")
embeddings = embed(['this is a test', 'look at the embeddings', 'greatest of all time'])
embeddings

### Task 4: Define Training Procedure

In [12]:
%%writefile -a train.py

def train(epochs=5, batch_size=32, train_file='train.csv', val_file='test.csv'):
    workdir = os.getcwd()
    print("Loading training/validation data...")
    
    y_train, X_train = load_dataset(train_file, num_rows=1e5)
    y_val, X_val = load_dataset(val_file, num_rows=1e4)
    
    print("Training the model...")
    model = get_model()
    model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, 
              validation_data=(X_val, y_val), 
              callbacks=[tf.keras.callbacks.ModelCheckpoint(os.path.join(workdir, 'model_checkpoint'),
                                                            monitor='val_loss', verbose=1, save_best_model=True,
                                                            save_weights_only=False, mode='auto')])
    return model

Appending to train.py


### Task 5: Train and Export Model as Protobuf

In [13]:
%%writefile -a train.py

def export_model(model, base_path='amazon_review'):
    workdir = os.getcwd()
    time_ = str(int(time.time()))
    path = os.path.join(workdir, base_path, time_)
    tf.saved_model.save(model, path)
    
if __name__ == '__main__':
    model = train()
    export_model(model)

Appending to train.py


### Task 6: Test Model

#### Negative Review:

In [None]:
test_sentence = "horrible book, waste of time"
model.predict([test_sentence])

#### Positive Review:

In [None]:
test_sentence = "Awesome product."
model.predict([test_sentence])

### Task 7: TensorFlow Serving with Docker

`docker pull tensorflow/serving`

`docker run -p 8500:8500 \
            -p 8501:8501 \
            --mount type=bind,\
            source=amazon_review/,\
            target=/models/amazon_review \
            -e MODEL_NAME=amazon_review \
            -t tensorflow/serving`

### Task 8: Setup a REST Client to Perform Model Predictions

#### Perform Model Prediction

##### Support for gRPC and REST

- TensorFlow Serving supports
    - Remote Procedure Protocal (gRPC)
    - Representational State Transfer (REST)
- Consistent API structures
- Server supports both standards simultaneously
- Default ports:
    - RPC: 8500
    - REST: 8501

#### Predictions via REST

- Standard HTTP POST requests
- Response is a JSON body with the prediction
- Request from the default or specific model

Default URI scheme:

`http://{HOST}:{PORT}/v1/models/{MODEL_NAME}`

Specific model versions:

`http://{HOST}:{PORT}/v1/models/{MODEL_NAME}[/versions/{MODEL_VERSION}]:predict`

In [None]:
%%writefile tf_serving_rest_client.py
import json
import requests
import sys

def get_rest_url(model_name, host='127.0.0.1', port='8501', verb='predict', version=None):
    """ generate the URL path"""
    url = "http://{host}:{port}/v1/models/{model_name}".format(host=host, port=port, model_name=model_name)
    if version:
        url += 'versions/{version}'.format(version=version)
    url += ':{verb}'.format(verb=verb)
    return url


def get_model_prediction(model_input, model_name='amazon_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""

    url = get_rest_url(model_name)
    #In the row format, inputs are keyed to instances key in the JSON request.
    #When there is only one named input, specify the value of instances key to be the value of the input:
    data = {"instances": [model_input]}
    
    rv = requests.post(url, data=json.dumps(data))
    if rv.status_code != requests.codes.ok:
        rv.raise_for_status()
    
    return rv.json()['predictions']

if __name__ == '__main__':

    print("\nGenerate REST url ...")
    url = get_rest_url(model_name='amazon_review')
    print(url)
    
    while True:
        print("\nEnter an Amazon review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = input()
        if sentence == ':q':
            break
        model_input = sentence
        model_prediction = get_model_prediction(model_input)
        print("The model predicted ...")
        print(model_prediction)

### Task 9: Setup a gRPC Client to Perform Model Predictions

Modified from [https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/mnist_client.py](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/mnist_client.py#L152)

#### Predictions via gRPC

More sophisticated client-server connections

- Prediction data has to be converted to the Protobuf format
- Request types have designated types, e.g. float, int, bytes
- Payloads need to be converted to base64
- Connect to the server via gRPC stubs

#### gRPC vs REST: When to use which API standard

- Rest is easy to implement and debug
- RPC is more network efficient, smaller payloads
- RPC can provide much faster inferences!

In [1]:
%%writefile tf_serving_grpc_client.py
import sys
import grpc
from grpc.beta import implementations
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2, get_model_metadata_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc


def get_stub(host='127.0.0.1', port='8500'):
    channel = grpc.insecure_channel('127.0.0.1:8500') 
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    return stub


def get_model_prediction(model_input, stub, model_name='amazon_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = signature_name
    request.inputs['input_input'].CopyFrom(tf.make_tensor_proto(model_input))
    response = stub.Predict.future(request, 5.0)  # 5 seconds
    return response.result().outputs["output"].float_val


def get_model_version(model_name, stub):
    request = get_model_metadata_pb2.GetModelMetadataRequest()
    request.model_spec.name = 'amazon_review'
    request.metadata_field.append("signature_def")
    response = stub.GetModelMetadata(request, 10)
    # signature of loaded model is available here: response.metadata['signature_def']
    return response.model_spec.version.value

if __name__ == '__main__':
    print("\nCreate RPC connection ...")
    stub = get_stub()
    while True:
        print("\nEnter an Amazon review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = raw_input() if sys.version_info[0] < 3 else input()
        if sentence == ':q':
            break
        model_input = [sentence]
        model_prediction = get_model_prediction(model_input, stub)
        print("The model predicted ...")
        print(model_prediction)

Overwriting tf_serving_grpc_client.py


In [None]:
!saved_model_cli show --dir /home/cicada/Downloads/rhyme/TF_Serving/amazon_review/1597906549 --all