Skip to content

Commit

Permalink
Merge pull request #135 from fprost/serving_for_tfrecords
Browse files Browse the repository at this point in the history
Adding serving function for tf_records with unknown batch size.
  • Loading branch information
fprost committed Jul 19, 2018
2 parents 413b6f3 + 8782eba commit 361457c
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 17 deletions.
39 changes: 37 additions & 2 deletions experiments/tf_trainer/common/serving_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@
import tensorflow as tf
from tensorflow.python.ops import array_ops

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string("serving_format", "TFRECORDS",
"Format of inputs in inference."
"Can be either JSON or TFRECORDS.")


def create_serving_input_fn(feature_preprocessor_init, text_feature_name, key_name):

def serving_input_fn():
def serving_input_fn_json():
features_placeholders = {}
features_placeholders[text_feature_name] = array_ops.placeholder(
dtype=tf.string, name=text_feature_name)
Expand All @@ -26,4 +33,32 @@ def serving_input_fn():
features,
features_placeholders)

return serving_input_fn
def serving_input_fn_tfrecords():
serialized_example = tf.placeholder(
shape=[None],
dtype=tf.string,
name="input_example_tensor"
)
feature_spec = {
text_feature_name: tf.FixedLenFeature([], dtype=tf.string),
key_name: tf.FixedLenFeature([], dtype=tf.int64)
}

features = tf.parse_example(
serialized_example, feature_spec)
feature_preprocessor = feature_preprocessor_init()
features[text_feature_name] = feature_preprocessor(
features[text_feature_name])

return tf.estimator.export.ServingInputReceiver(
features,
serialized_example)

if FLAGS.serving_format == 'TFRECORDS':
return serving_input_fn_tfrecords
elif FLAGS.serving_format == 'JSON':
return serving_input_fn_json
else:
raise ValueError('Serving format not implemented.'
' Should be one of ["JSON", "TFRECORDS"].'
)
15 changes: 6 additions & 9 deletions experiments/tf_trainer/common/text_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,20 @@ def _tokenize_tensor_op(text: types.Tensor) -> types.Tensor:
'''Converts a string Tensor to an array of integers.
Args:
text: must be a scalar string tensor (rank 0).
text: must be a 1-D Tensor string tensor.
Returns:
A 1-D Tensor of word integers.
A 2-D Tensor of word integers.
'''

# TODO: Improve tokenizer.
# TODO: Ensure utf-8 encoding. Currently the string is parsed with default encoding (unclear).
words = tf.string_split([text])
words = tf.string_split(text)
words_int_sparse = vocabulary_table.lookup(words)
words_int_dense = tf.sparse_to_dense(
words_int_sparse.indices,
words_int_sparse.dense_shape,
words_int_sparse.values,
words_int_dense = tf.sparse_tensor_to_dense(
words_int_sparse,
default_value=0)

return tf.squeeze(words_int_dense)
return words_int_dense

return _tokenize_tensor_op

Expand Down
4 changes: 3 additions & 1 deletion experiments/tf_trainer/common/tfrecord_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ def _read_tf_example(self,

text = parsed[self._text_feature]
# I think this could be a feature column, but feature columns seem so beta.
preprocessed_text = feature_preprocessor(text)
expanded_text = tf.expand_dims(text, 0)
preprocessed_text = tf.squeeze(
feature_preprocessor(expanded_text))
features = {self._text_feature: preprocessed_text}
if self._round_labels:
labels = {label: tf.round(parsed[label]) for label in self._labels}
Expand Down
6 changes: 2 additions & 4 deletions experiments/tf_trainer/keras_gru_attention/run.deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
# Edit these!
MODEL_NAME=keras_gru_attention
# By default, the model is the last one from the user.
MODEL_SAVED_PATH_FOLDER=$(gsutil ls gs://kaggle-model-experiments/tf_trainer_runs/${USER}/${MODEL_NAME}/)
MODEL_SAVED_PATH=${MODEL_SAVED_PATH_FOLDER}model_dir
MODEL_SAVED_PATH=$(gsutil ls gs://kaggle-model-experiments/tf_trainer_runs/${USER}/${MODEL_NAME}/ | tail -1)

# Create a new model.
# Will raise an error if the model already exists.
Expand All @@ -17,5 +16,4 @@ MODEL_VERSION=v_$(date +"%Y%m%d_%H%M%S")
gcloud ml-engine versions create $MODEL_VERSION \
--model $MODEL_NAME \
--origin $MODEL_SAVED_PATH \
--runtime-version 1.8

--runtime-version 1.8
2 changes: 1 addition & 1 deletion experiments/tf_trainer/keras_gru_attention/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"required for serving.")
tf.app.flags.DEFINE_integer("batch_size", 64,
"The batch size to use during training.")
tf.app.flags.DEFINE_integer("train_steps", 100,
tf.app.flags.DEFINE_integer("train_steps", 1000,
"The number of steps to train for.")
tf.app.flags.DEFINE_integer("eval_period", 50,
"The number of steps per eval period.")
Expand Down

0 comments on commit 361457c

Please sign in to comment.