## TensorFlow Serving with Docker

* [Dataset](https://www.kaggle.com/snap/amazon-fine-food-reviews/data)
* Using Google's [model](https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1) from TensorFlow hub - token based text embedding trained on English Google News 200B corpus. [paper](https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)
* Exporting model as Protobuf
* Deploying with Docker gRPC and REST APIs. [tensorflow serving](https://hub.docker.com/r/tensorflow/serving)

In [1]:
import os
import time
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

## Data Review

In [2]:
df = pd.read_csv('../../github/python-data-science/data/docker/train.csv')

In [3]:
df.shape

(284227, 10)

In [4]:
df.head(2)

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,184502,B001BCVY4W,A1JMR1N9NBYJ1X,Mad Ethyl Flint,0,0,4,1228176000,Doesn't look like catfood!,"When you first open the can, it looks like som..."
1,182779,B0052LZ6XI,A2CVFBDRXYFZG9,vanostran,0,0,5,1335657600,Solid Mayo,This is a solid mayo. Will not disappoint. At ...


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284227 entries, 0 to 284226
Data columns (total 10 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   Id                      284227 non-null  int64 
 1   ProductId               284227 non-null  object
 2   UserId                  284227 non-null  object
 3   ProfileName             284216 non-null  object
 4   HelpfulnessNumerator    284227 non-null  int64 
 5   HelpfulnessDenominator  284227 non-null  int64 
 6   Score                   284227 non-null  int64 
 7   Time                    284227 non-null  int64 
 8   Summary                 284213 non-null  object
 9   Text                    284227 non-null  object
dtypes: int64(5), object(5)
memory usage: 21.7+ MB


In [6]:
df.columns

Index(['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator',
       'HelpfulnessDenominator', 'Score', 'Time', 'Summary', 'Text'],
      dtype='object')

In [7]:
df['Score'].value_counts()

5    181679
4     40075
1     26369
3     21305
2     14799
Name: Score, dtype: int64

## Pipeline for loading data

In [8]:
def load_data(file_path, num_samples):
    df = pd.read_csv(file_path, usecols=[6,9], nrows=num_samples)
    df.columns = ['rating','title']
    text = df['title'].tolist()
    text = [str(t).encode(encoding='ascii', errors='replace') for t in text]
    text = np.array(text, dtype=object)
    
    labels = df['rating'].tolist()
    labels = [1 if i>=4 else 0 if i==3 else -1 for i in labels]
    labels = np.array(pd.get_dummies(labels), dtype=int)
    return text, labels

In [9]:
train_data = '../../github/python-data-science/data/docker/train.csv'
test_data  = '../../github/python-data-science/data/docker/test.csv'

In [10]:
temp_text, temp_lab = load_data(train_data, 100)

In [11]:
temp_lab[0:4]

array([[0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1]])

In [12]:
temp_text.shape

(100,)

### Google's model from TensorFlow hub

In [28]:
## https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1
## https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1

# Model 1
# def get_model():
#     hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1", output_shape=[128],
#                                input_shape=[], dtype=tf.string, name='input', trainable=False)
#     model = tf.keras.Sequential()
#     model.add(hub_layer)
#     model.add(tf.keras.layers.Dense(16, activation='relu'))
#     model.add(tf.keras.layers.Dense(3, activation='softmax', name='output'))
#     model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
#     model.summary()
#     return model

# Model 2
def get_model():
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1", output_shape=[128],
                               input_shape=[], dtype=tf.string, name='input', trainable=False)
    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(64, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='softmax', name='output'))
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
    model.summary()
    return model

In [29]:
get_model()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (KerasLayer)          (None, 128)               124642688 
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 output (Dense)              (None, 3)                 195       
                                                                 
Total params: 124,651,139
Trainable params: 8,451
Non-trainable params: 124,642,688
_________________________________________________________________


<keras.engine.sequential.Sequential at 0x2972da35670>

In [30]:
embed = hub.load('https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1')
embeddings = embed(['this is a test', 'look at the embeddings'])
embeddings[0]

<tf.Tensor: shape=(50,), dtype=float32, numpy=
array([ 0.05650096,  0.2567145 ,  0.24404189,  0.14395264, -0.05569138,
       -0.10513686,  0.09544804,  0.3080969 , -0.218672  , -0.03048538,
       -0.19036277,  0.01005417,  0.11541115, -0.14860378,  0.03914931,
       -0.2561884 , -0.15442336,  0.12836292,  0.0469152 , -0.1500514 ,
       -0.13068351, -0.01958708,  0.09192695,  0.1208052 , -0.12291992,
       -0.04548305, -0.3679261 ,  0.05125156,  0.09797382, -0.10217863,
       -0.1965521 ,  0.15523128, -0.05881735, -0.16426983,  0.06646369,
        0.05789638,  0.15421619, -0.24014738,  0.11075415, -0.10756174,
       -0.01679449, -0.01877424,  0.18602087,  0.2623015 , -0.3829217 ,
       -0.34895867, -0.0868978 ,  0.02295742,  0.03787762, -0.02646483],
      dtype=float32)>

### Train & Export Model

In [31]:
def train_model(EPOCHS=5, BATCH_SIZE=32, TRAIN_FILE=train_data, VAL_FILE=test_data):
    print('Loading train/test data...')
    x_train, y_train = load_data(TRAIN_FILE, 100000)
    x_val, y_val     = load_data(VAL_FILE,  10000)
    
    print('Training model...')
    model = get_model()
    WORKING_DIR = os.getcwd()    
    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1,
             validation_data=(x_val, y_val),
             callbacks=[tf.keras.callbacks.ModelCheckpoint(os.path.join(WORKING_DIR, 'model_checkpoint'), 
                                                          monitor='val_loss', verbose=0,
                                                          save_best_model=True,
                                                          save_weights_only=False,
                                                          model='auto')])
    return model        

In [32]:
def export_model(model, base_path='model_review/'):
    path = os.path.join(base_path, str(int(time.time())))
    tf.saved_model.save(model, path)

In [33]:
if __name__ == '__main__':
    model = train_model()
    export_model(model)

Loading train/test data...
Training model...
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (KerasLayer)          (None, 128)               124642688 
                                                                 
 dense_3 (Dense)             (None, 64)                8256      
                                                                 
 output (Dense)              (None, 3)                 195       
                                                                 
Total params: 124,651,139
Trainable params: 8,451
Non-trainable params: 124,642,688
_________________________________________________________________
Epoch 1/5


INFO:tensorflow:Assets written to: C:\Users\uditg\Documents\Python Scripts\Lessons\model_checkpoint\assets


Epoch 2/5


INFO:tensorflow:Assets written to: C:\Users\uditg\Documents\Python Scripts\Lessons\model_checkpoint\assets


Epoch 3/5


INFO:tensorflow:Assets written to: C:\Users\uditg\Documents\Python Scripts\Lessons\model_checkpoint\assets


Epoch 4/5


INFO:tensorflow:Assets written to: C:\Users\uditg\Documents\Python Scripts\Lessons\model_checkpoint\assets


Epoch 5/5


INFO:tensorflow:Assets written to: C:\Users\uditg\Documents\Python Scripts\Lessons\model_checkpoint\assets


INFO:tensorflow:Assets written to: model_review/1648794896\assets


INFO:tensorflow:Assets written to: model_review/1648794896\assets


### Test Model

In [34]:
model.predict(['Bad book, waste of time, do not buy'])
# model 1 - array([[0.6144746 , 0.05491435, 0.33061108]], dtype=float32)
# model 2 - array([[0.9178998 , 0.01348544, 0.06861478]], dtype=float32)

array([[0.9178998 , 0.01348544, 0.06861478]], dtype=float32)

In [35]:
model.predict(['Awesome product'])
# model 1 - array([[0.01603218, 0.01620099, 0.9677669 ]], dtype=float32)
# model 2 - array([[0.01482655, 0.00417724, 0.98099625]], dtype=float32)

array([[0.01482655, 0.00417724, 0.98099625]], dtype=float32)

## Model Deployment

### Docker

In [38]:
# docker pull tensorflow/serving

# docker run -p 8500:8500 -p 8501:8501 --mount type=bind,source="C:/users/uditg/documents/python scripts/lessons/"model_review\,target=/models/model_review -e MODEL_NAME=model_review -t tensorflow/serving

# Model manager behind tensorflow/serving automatically unloads old model and uploads new model

### Endpoints - REST or gRPC
 TensorFlow Serving supports
- Remote Procedure Protocal (gRPC) - default port 8500
- Representational State Transfer (REST) - default port 8501
- Consistent API structures. Server supports both standards simultaneously

### gRPC vs. REST
- Rest is easy to implement and debug
- RPC is more network efficient, smaller payloads and can provide much faster inferences

---

### Using REST url

- Standard HTTP POST requests
- Response is a JSON body with the prediction
- Request from the default or specific model  
Default URI scheme: `http://{HOST}:{PORT}/v1/models/{MODEL_NAME}`  
Specific model versions: `http://{HOST}:{PORT}/v1/models/{MODEL_NAME}[/versions/{MODEL_VERSION}]:predict`

In [40]:
import json
import requests
import sys

In [41]:
def get_rest_url(model_name, host='127.0.0.1', port='8501', verb='predict', version=None):
    url = "http://{host}:{port}/v1/models/{model_name}".format(host=host, port=port, model_name=model_name)
    if version:
        url += 'versions/{version}'.format(version=version)
    url += ':{verb}'.format(verb=verb)
    return url

In [42]:
def get_model_prediction(model_input, model_name='model_review', signature_name='serving_default'):
    url = get_rest_url(model_name)
    
    #In the row format, inputs are keyed to instances key in the JSON request.
    #When there is only one named input, specify the value of instances key to be the value of the input:
    data = {"instances": [model_input]}
    
    rv = requests.post(url, data=json.dumps(data))
    if rv.status_code != requests.codes.ok:
        rv.raise_for_status()
    
    return rv.json()['predictions']

In [43]:
if __name__ == '__main__':

    print("\nGenerate REST url ...")
    url = get_rest_url(model_name='model_review')
    print(url)
    
    while True:
        print("\nEnter a product review [:q for Quit]")
        if sys.version_info[0] <= 3:                     # checks python version
            sentence = input()
        if sentence == ':q':
            break
        model_input = sentence
        model_prediction = get_model_prediction(model_input)
        print("The model predicted ...")
        print(model_prediction)


Generate REST url ...
http://127.0.0.1:8501/v1/models/model_review:predict

Enter a product review [:q for Quit]
'Bad book, waste of time, do not buy'
The model predicted ...
[[0.876614928, 0.0399460234, 0.0834390745]]

Enter a product review [:q for Quit]
Awesome product
The model predicted ...
[[0.0148265501, 0.00417723553, 0.980996251]]

Enter a product review [:q for Quit]
Bad book, waste of time, do not buy
The model predicted ...
[[0.917899787, 0.013485441, 0.0686147809]]

Enter a product review [:q for Quit]
:q


---

### Using gRPC client

More sophisticated client-server connections
- Prediction data has to be converted to the Protobuf format
- Request types have designated types, e.g. float, int, bytes
- Payloads need to be converted to base64
- Connect to the server via gRPC stubs

In [None]:
# !pip install tensorflow-serving-api

In [26]:
import sys
import grpc
from grpc.beta import implementations
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2, get_model_metadata_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc

In [44]:
def get_stub(host='127.0.0.1', port='8500'):
    channel = grpc.insecure_channel('127.0.0.1:8500') 
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    return stub

def get_model_prediction(model_input, stub, model_name='model_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = signature_name
    request.inputs['input_input'].CopyFrom(tf.make_tensor_proto(model_input))
    response = stub.Predict.future(request, 5.0)  # 5 seconds
    return response.result().outputs["output"].float_val

def get_model_version(model_name, stub):
    request = get_model_metadata_pb2.GetModelMetadataRequest()
    request.model_spec.name = 'model_review'
    request.metadata_field.append("signature_def")
    response = stub.GetModelMetadata(request, 10)
    # signature of loaded model is available here: response.metadata['signature_def']
    return response.model_spec.version.value

if __name__ == '__main__':
    print("\nCreate RPC connection ...")
    stub = get_stub()
    while True:
        print("\nEnter a product review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = raw_input() if sys.version_info[0] < 3 else input()
        if sentence == ':q':
            break
        model_input = [sentence]
        model_prediction = get_model_prediction(model_input, stub)
        print("The model predicted ...")
        print(model_prediction)


Create RPC connection ...

Enter a product review [:q for Quit]
Bad book, waste of time, do not buy
The model predicted ...
[0.9178997874259949, 0.013485440984368324, 0.06861478090286255]

Enter a product review [:q for Quit]
Awesome product
The model predicted ...
[0.014826550148427486, 0.004177235532552004, 0.9809962511062622]

Enter a product review [:q for Quit]
:q
