Merge pull request #135 from fprost/serving_for_tfrecords

Adding serving function for tf_records with unknown batch size.
conversationai · Jul 19, 2018 · 361457c · 361457c
2 parents 413b6f3 + 8782eba
commit 361457c
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 17 deletions.
diff --git a/experiments/tf_trainer/common/serving_input.py b/experiments/tf_trainer/common/serving_input.py
@@ -7,9 +7,16 @@
 import tensorflow as tf
 from tensorflow.python.ops import array_ops
 
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string("serving_format", "TFRECORDS",
+                           "Format of inputs in inference."
+                           "Can be either JSON or TFRECORDS.")
+
+
 def create_serving_input_fn(feature_preprocessor_init, text_feature_name, key_name):
 
-  def serving_input_fn():
+  def serving_input_fn_json():
     features_placeholders = {}
     features_placeholders[text_feature_name] = array_ops.placeholder(
         dtype=tf.string, name=text_feature_name)
@@ -26,4 +33,32 @@ def serving_input_fn():
         features,
         features_placeholders)
 
-  return serving_input_fn
+  def serving_input_fn_tfrecords():
+    serialized_example = tf.placeholder(
+        shape=[None],
+        dtype=tf.string,
+        name="input_example_tensor"
+    )
+    feature_spec = {
+        text_feature_name: tf.FixedLenFeature([], dtype=tf.string),
+        key_name: tf.FixedLenFeature([], dtype=tf.int64)
+    }
+
+    features = tf.parse_example(
+        serialized_example, feature_spec)
+    feature_preprocessor = feature_preprocessor_init()
+    features[text_feature_name] = feature_preprocessor(
+        features[text_feature_name])
+
+    return tf.estimator.export.ServingInputReceiver(
+        features,
+        serialized_example)
+
+  if FLAGS.serving_format == 'TFRECORDS':
+    return serving_input_fn_tfrecords
+  elif FLAGS.serving_format == 'JSON':
+    return serving_input_fn_json
+  else:
+    raise ValueError('Serving format not implemented.'
+        ' Should be one of ["JSON", "TFRECORDS"].'
+        )
diff --git a/experiments/tf_trainer/common/text_preprocessor.py b/experiments/tf_trainer/common/text_preprocessor.py
@@ -54,23 +54,20 @@ def _tokenize_tensor_op(text: types.Tensor) -> types.Tensor:
       '''Converts a string Tensor to an array of integers.
 
       Args:
-        text: must be a scalar string tensor (rank 0).
+        text: must be a 1-D Tensor string tensor.
 
       Returns:
-        A 1-D Tensor of word integers.
+        A 2-D Tensor of word integers.
       '''
 
       # TODO: Improve tokenizer.
       # TODO: Ensure utf-8 encoding. Currently the string is parsed with default encoding (unclear). 
-      words = tf.string_split([text])
+      words = tf.string_split(text)
       words_int_sparse = vocabulary_table.lookup(words)
-      words_int_dense = tf.sparse_to_dense(
-          words_int_sparse.indices,
-          words_int_sparse.dense_shape,
-          words_int_sparse.values,
+      words_int_dense = tf.sparse_tensor_to_dense(
+          words_int_sparse,
           default_value=0)
-
-      return tf.squeeze(words_int_dense)
+      return words_int_dense
 
     return _tokenize_tensor_op
 

diff --git a/experiments/tf_trainer/common/tfrecord_input.py b/experiments/tf_trainer/common/tfrecord_input.py
@@ -87,7 +87,9 @@ def _read_tf_example(self,
 
     text = parsed[self._text_feature]
     # I think this could be a feature column, but feature columns seem so beta.
-    preprocessed_text = feature_preprocessor(text)
+    expanded_text = tf.expand_dims(text, 0)
+    preprocessed_text = tf.squeeze(
+        feature_preprocessor(expanded_text))
     features = {self._text_feature: preprocessed_text}
     if self._round_labels:
       labels = {label: tf.round(parsed[label]) for label in self._labels}

diff --git a/experiments/tf_trainer/keras_gru_attention/run.deploy.sh b/experiments/tf_trainer/keras_gru_attention/run.deploy.sh
@@ -4,8 +4,7 @@
 # Edit these!
 MODEL_NAME=keras_gru_attention
 # By default, the model is the last one from the user.
-MODEL_SAVED_PATH_FOLDER=$(gsutil ls gs://kaggle-model-experiments/tf_trainer_runs/${USER}/${MODEL_NAME}/)
-MODEL_SAVED_PATH=${MODEL_SAVED_PATH_FOLDER}model_dir
+MODEL_SAVED_PATH=$(gsutil ls gs://kaggle-model-experiments/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)
 
 # Create a new model.
 # Will raise an error if the model already exists.
@@ -17,5 +16,4 @@ MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S")
 gcloud ml-engine versions create $MODEL_VERSION \
   --model $MODEL_NAME \
   --origin $MODEL_SAVED_PATH \
-  --runtime-version 1.8
-
+  --runtime-version 1.8 
diff --git a/experiments/tf_trainer/keras_gru_attention/run.py b/experiments/tf_trainer/keras_gru_attention/run.py
@@ -32,7 +32,7 @@
                            "required for serving.")
 tf.app.flags.DEFINE_integer("batch_size", 64,
                             "The batch size to use during training.")
-tf.app.flags.DEFINE_integer("train_steps", 100,
+tf.app.flags.DEFINE_integer("train_steps", 1000,
                             "The number of steps to train for.")
 tf.app.flags.DEFINE_integer("eval_period", 50,
                             "The number of steps per eval period.")