# Start your TensorFlow Serving instance

In [14]:
"""
!docker run -p 8500:8500 -p 8501:8501 \
--mount type=bind,source=/tmp,target=/models/movie \
-e MODEL_NAME=movie -t tensorflow/serving

You can check your model with 

!saved_model_cli show --dir /path/to/the/model --tag_set serve --signature_def serving_default

"""

'\n!docker run -p 8500:8500 -p 8501:8501 --mount type=bind,source=/tmp,target=/models/movie -e MODEL_NAME=movie -t tensorflow/serving\n'

# Some Test Data

In [30]:
TEXTS = ("""Quantum of Solace continues the adventures of James Bond after Casino Royale. Betrayed by Vesper, the woman he loved, 007 fights the urge to make his latest mission personal. Pursuing his determination to uncover the truth, 
                    Bond and M interrogate Mr. White, who reveals that the organization that blackmailed Vesper is far more complex and dangerous than anyone had imagined.""",
#          """Monty Python and the Holy Grail loosely follows the legend of King Arthur. Arthur along with his squire, Patsy, recruits his
#     Knights of the Round Table, including Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Robin the
#     Not-Quite-So-Brave-As-Sir-Lancelot and Sir Galahad the Pure. On the way Arthur battles the Black Knight who, despite having had
#     all his limbs chopped off, insists he can still fight. They reach Camelot, but Arthur decides not to enter, as "it is a silly
#     place"."""
)

# REST Prediction Example

In [31]:
import requests
import json

In [32]:
def rest_request():
    url = 'http://localhost:8501/v1/models/movie:predict'
    payload = json.dumps({"instances": [TEXTS[0]]})
    r = requests.post(url, payload)
    return r

In [35]:
%%timeit -n 1

rs_rest = rest_request()

The slowest run took 55.64 times longer than the fastest. This could mean that an intermediate result is being cached.
63.6 ms ± 134 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [36]:
rs_rest = rest_request()
rs_rest.json()

{'predictions': [{'classes': ['0', '1', '2', '3', '4', '5', '6', '7', '8'],
   'scores': [0.401,
    0.0962357,
    0.262375,
    0.392996,
    0.0621801,
    0.0822788,
    0.296542,
    0.0819803,
    0.567294]}]}

# gRPC Predict Example

In [23]:
import grpc
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
import tensorflow as tf

In [27]:
def grpc_request():
    hostport = 'localhost:8500'
    
    channel = grpc.insecure_channel(hostport)
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    request = predict_pb2.PredictRequest()
    request.model_spec.name = 'movie'
    request.model_spec.signature_name = 'serving_default'
    
    request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(TEXTS[0], shape=[1,1]))
    result_future = stub.Predict.future(request, 10.25) 
    return result_future

In [28]:
%%timeit -n 1
rs_grpc = grpc_request()

1.52 ms ± 413 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
a = grpc_request()
a.result()

outputs {
  key: "classes"
  value {
    dtype: DT_STRING
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 9
      }
    }
    string_val: "0"
    string_val: "1"
    string_val: "2"
    string_val: "3"
    string_val: "4"
    string_val: "5"
    string_val: "6"
    string_val: "7"
    string_val: "8"
  }
}
outputs {
  key: "scores"
  value {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 9
      }
    }
    float_val: 0.4009998142719269
    float_val: 0.0962357223033905
    float_val: 0.2623746991157532
    float_val: 0.3929961621761322
    float_val: 0.06218007206916809
    float_val: 0.08227881789207458
    float_val: 0.2965419888496399
    float_val: 0.08198034763336182
    float_val: 0.5672940611839294
  }
}
model_spec {
  name: "movie"
  version {
    value: 1556583584
  }
  signature_name: "serving_default"
}