Full more refactoring to tf agnostic.

epigramai · Jan 15, 2018 · 6548b34 · 6548b34
1 parent 714a7f8
commit 6548b34
Show file tree

Hide file tree

Showing 15 changed files with 114 additions and 97 deletions.
diff --git a/README.md b/README.md
@@ -70,4 +70,11 @@ MockClient.predict(self, request_data, request_timeout=None):
 
  `client.predict(request_data)`
 
-The mock client predict function simply returns the mock response. 
+The mock client predict function simply returns the mock response.
+
+
+## Develop
+
+### Generate python code from .proto files
+`pip install grpcio-tools`
+`python -m grpc_tools.protoc -I protos/ --python_out=predict_client/pbs --grpc_python_out=predict_client/pbs protos/*`
diff --git a/download_model.sh b/download_model.sh
diff --git a/example.py b/example.py
@@ -1,5 +1,4 @@
 import logging
-import numpy as np
 
 from predict_client.prod_client import ProdClient
 
@@ -11,20 +10,12 @@
 
 # Make sure you have a model running on localhost:9000
 host = 'localhost:9000'
-model_name = 'incv4'
+model_name = 'simple'
 model_version = 1
 
-client = ProdClient(host, model_name, model_version, in_tensor_dtype='DT_UINT8')
+client = ProdClient(host, model_name, model_version)
 
-# Mock up some input data, an image with shape 299,299,3
-img = np.zeros((299, 299, 3)).astype(int)
+req_data = [{'in_tensor_name': 'a', 'in_tensor_dtype': 'DT_INT32', 'data': 2}]
 
-logger.info('Request data shape: ' + str(img.shape))
-
-prediction = client.predict(img, request_timeout=10)
-
-for k in prediction:
-    logger.info('Prediction key: ' + str(k) + ', shape: ' + str(prediction[k].shape))
-
-if len(prediction) == 0:
-    logger.info('Got empty prediction')
+prediction = client.predict(req_data, request_timeout=10)
+logger.info('Prediction: {}'.format(prediction))
diff --git a/incv4_example.py b/incv4_example.py
@@ -0,0 +1,23 @@
+import logging
+import numpy as np
+
+from predict_client.prod_client import ProdClient
+
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
+
+# In each file/module, do this to get the module name in the logs
+logger = logging.getLogger(__name__)
+
+# Make sure you have a model running on localhost:9000
+host = 'localhost:9001'
+model_name = 'incv4'
+model_version = 1
+img = np.zeros((299, 299, 3)).astype(int)
+req_data = [{'in_tensor_name': 'inputs', 'in_tensor_dtype': 'DT_UINT8', 'data': img}]
+
+client = ProdClient(host, model_name, model_version)
+
+prediction = client.predict(req_data, request_timeout=10)
+for k in prediction:
+    logger.info('Prediction key: {}, shape: {}'.format(k, prediction[k].shape))
diff --git a/predict_client/inmemory_client.py b/predict_client/inmemory_client.py
@@ -1,6 +1,5 @@
 import tensorflow as tf
 import logging
-from tensorflow.python.saved_model import signature_constants
 from tensorflow.contrib.saved_model.python.saved_model import signature_def_utils
 
 
@@ -20,34 +19,20 @@ def __init__(self, model_path):
         meta_graph_def_sig = signature_def_utils.get_signature_def_by_key(meta_graph_def, signature_def)
 
         self.input_tensor_info = meta_graph_def_sig.inputs
-        self.output_tensor_info = meta_graph_def_sig.outputs
-
-        self.input_tensor_name = self.input_tensor_info[signature_constants.CLASSIFY_INPUTS].name
-
-        print(self.input_tensor_name)
-        self.logger.error('Input shape: {}'.format(tf.shape(self.input_tensor_name)))
-
-        # Mock client only supports one input, named 'inputs', for now
-        if not self.input_tensor_name:
-            raise ValueError('Unable to find input tensor of model.'
-                             'Expected signature_constants.CLASSIFY_INPUTS to be only input tensor.')
 
+        self.output_tensor_info = meta_graph_def_sig.outputs
         self.output_tensor_keys = [k for k in self.output_tensor_info]
-
-        # Run all output tensors
-        if len(self.output_tensor_keys) == 0:
-            raise ValueError('Unable to find any output tensors of model.')
-
         self.output_tensor_names = [self.output_tensor_info[k].name for k in self.output_tensor_keys]
 
     def predict(self, request_data, **kwargs):
 
         self.logger.info('Sending request to inmemory model')
         self.logger.info('Model path: ' + str(self.model_path))
 
-        self.logger.debug('Running tensors: ' + str(self.output_tensor_keys))
-
-        feed_dict = {self.input_tensor_name: request_data}
+        feed_dict = dict()
+        for d in request_data:
+            input_tensor_name = self.input_tensor_info[d['in_tensor_name']].name
+            feed_dict[input_tensor_name] = d['data']
 
         results = self.sess.run(self.output_tensor_names, feed_dict=feed_dict)
 

diff --git a/predict_client/mock_client.py b/predict_client/mock_client.py
@@ -0,0 +1,6 @@
+class MockClient:
+    def __init__(self, mock_reponse):
+        self.mock_reponse = mock_reponse
+
+    def predict(self, request_data, **kwargs):
+        return self.mock_reponse
diff --git a/predict_client/pbs/model_pb2.py b/predict_client/pbs/model_pb2.py
diff --git a/predict_client/pbs/predict_pb2.py b/predict_client/pbs/predict_pb2.py
diff --git a/predict_client/pbs/prediction_service_pb2.py b/predict_client/pbs/prediction_service_pb2.py
diff --git a/predict_client/pbs/tensor_pb2.py b/predict_client/pbs/tensor_pb2.py
diff --git a/predict_client/prod_client.py b/predict_client/prod_client.py
@@ -4,73 +4,64 @@
 from grpc import RpcError
 from predict_client.pbs.prediction_service_pb2 import PredictionServiceStub
 from predict_client.pbs.predict_pb2 import PredictRequest
-from predict_client.util import result_to_dict, make_tensor_proto
+from predict_client.util import predict_response_to_dict, make_tensor_proto
+
 
 class ProdClient:
-    def __init__(self, host, model_name, model_version, in_tensor_dtype):
+    def __init__(self, host, model_name, model_version):
 
         self.logger = logging.getLogger(self.__class__.__name__)
 
         self.host = host
         self.model_name = model_name
         self.model_version = model_version
 
-        self.in_tensor_dtype = in_tensor_dtype
-
     def predict(self, request_data, request_timeout=10):
 
         self.logger.info('Sending request to tfserving model')
-        self.logger.info('Host: ' + str(self.host))
-        self.logger.info('Model name: ' + str(self.model_name))
-        self.logger.info('Model version: ' + str(self.model_version))
-
-        t = time.time()
-        self.logger.debug('Request data shape: ' + str(request_data.shape))
-        # tensor_proto = tf.contrib.util.make_tensor_proto(request_data, dtype=self.in_tensor_dtype,
-        #                                                  shape=request_data.shape)
+        self.logger.info('Host: {}'.format(self.host))
+        self.logger.info('Model name: {}'.format(self.model_name))
+        self.logger.info('Model version: {}'.format(self.model_version))
 
-        # tensor_proto = tf.contrib.util.make_tensor_proto(request_data)
-
-        self.logger.debug('Making tensor proto took: ' + str(time.time() - t))
+        # self.logger.debug('Request data shape: {}'.format(request_data.shape))
 
         # Create gRPC client and request
         t = time.time()
         channel = grpc.insecure_channel(self.host)
-        self.logger.debug('Establishing insecure channel took: ' + str(time.time() - t))
+        self.logger.debug('Establishing insecure channel took: {}'.format(time.time() - t))
 
         t = time.time()
         stub = PredictionServiceStub(channel)
-        self.logger.debug('Creating stub took: ' + str(time.time() - t))
+        self.logger.debug('Creating stub took: {}'.format(time.time() - t))
 
         t = time.time()
         request = PredictRequest()
-        self.logger.debug('Creating request object took: ' + str(time.time() - t))
+        self.logger.debug('Creating request object took: {}'.format(time.time() - t))
 
         request.model_spec.name = self.model_name
 
-        # if self.model_version > 0:
-            # request.model_spec.version.value = self.model_version
-
-        # print(tensor_proto)
-        # request.inputs['inputs'].CopyFrom(tensor_proto)
+        if self.model_version > 0:
+            request.model_spec.version.value = self.model_version
 
-        tensor_proto = make_tensor_proto(request_data, self.in_tensor_dtype)
+        t = time.time()
+        for d in request_data:
+            tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype'])
+            request.inputs[d['in_tensor_name']].CopyFrom(tensor_proto)
 
-        request.inputs['inputs'].CopyFrom(tensor_proto)
+        self.logger.debug('Making tensor protos took: {}'.format(time.time() - t))
 
         try:
             t = time.time()
-            result = stub.Predict(request, timeout=request_timeout)
+            predict_response = stub.Predict(request, timeout=request_timeout)
 
-            self.logger.debug('Actual request took: ' + str(time.time() - t))
-            self.logger.info('Got result')
+            self.logger.debug('Actual request took: {} seconds'.format(time.time() - t))
 
-            result_dict = result_to_dict(result)
+            predict_response_dict = predict_response_to_dict(predict_response)
 
-            keys = [k for k in result_dict]
-            self.logger.info('Got result with keys: ' + str(keys))
+            keys = [k for k in predict_response_dict]
+            self.logger.info('Got predict_response with keys: {}'.format(keys))
 
-            return result_dict
+            return predict_response_dict
 
         except RpcError as e:
             self.logger.error(e)

diff --git a/predict_client/util.py b/predict_client/util.py
@@ -68,24 +68,27 @@ def run_concurrent_requests(request_data, clients):
     return list(map(lambda j: j.value, jobs))
 
 
-def result_to_dict(result):
+def predict_response_to_dict(predict_response):
+    predict_response_dict = dict()
 
-    result_dict = dict()
-
-    for k in result.outputs:
-        shape = [x.size for x in result.outputs[k].tensor_shape.dim]
+    for k in predict_response.outputs:
+        shape = [x.size for x in predict_response.outputs[k].tensor_shape.dim]
 
         logger.debug('Key: ' + k + ', shape: ' + str(shape))
 
-        dtype_constant = result.outputs[k].dtype
+        dtype_constant = predict_response.outputs[k].dtype
 
         if dtype_constant not in number_to_dtype_value:
             logger.error('Tensor output data type not supported. Returning empty dict.')
-            result_dict[k] = 'value not found'
+            predict_response_dict[k] = 'value not found'
 
-        result_dict[k] = np.array(eval('result.outputs[k].' + number_to_dtype_value[dtype_constant])).reshape(shape)
+        if shape == [1]:
+            predict_response_dict[k] = eval('predict_response.outputs[k].' + number_to_dtype_value[dtype_constant])[0]
+        else:
+            predict_response_dict[k] = np.array(
+                eval('predict_response.outputs[k].' + number_to_dtype_value[dtype_constant])).reshape(shape)
 
-    return result_dict
+    return predict_response_dict
 
 
 def make_tensor_proto(data, dtype):
@@ -94,12 +97,19 @@ def make_tensor_proto(data, dtype):
     if type(dtype) is str:
         dtype = dtype_to_number[dtype]
 
+    dim = [{'size': 1}]
+    values = [data]
+
+    if hasattr(data, 'shape'):
+        dim = [{'size': dim} for dim in data.shape]
+        values = list(data.reshape(-1))
+
     tensor_proto_dict = {
         'dtype': dtype,
         'tensor_shape': {
-            'dim': [{'size': dim} for dim in data.shape]
+            'dim': dim
         },
-        'int_val': list(data.reshape(-1))
+        'int_val': values
     }
 
     dict_to_protobuf(tensor_proto_dict, tensor_proto)

diff --git a/protos/model.proto b/protos/model.proto
@@ -3,7 +3,7 @@ syntax = "proto3";
 package tensorflow.serving;
 option cc_enable_arenas = true;
 
-//import "google/protobuf/wrappers.proto";
+import "google/protobuf/wrappers.proto";
 
 // Metadata for an inference request such as the model name and version.
 message ModelSpec {
@@ -14,5 +14,5 @@ message ModelSpec {
   // Typically not needed unless coordinating across multiple models that were
   // co-trained and/or have inter-dependencies on the versions used at inference
   // time.
-  //google.protobuf.Int64Value version = 2;
+  google.protobuf.Int64Value version = 2;
 }
diff --git a/start_models.sh → run_incv4_model.sh b/start_models.sh → run_incv4_model.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 # Run a simple inception model with bottleneck output layer
-docker run -it -p 9000:9000 epigramai/model-server:incv4
+docker run -it -p 9001:9000 epigramai/model-server:incv4
 
 # To stop and remove container
 # docker stop incv4 && docker rm incv4
diff --git a/test_data/cat.jpg b/test_data/cat.jpg