From eb77e7b9bcda31752b129993e44bb034cec8c5ae Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 28 Feb 2019 18:02:05 -0500 Subject: [PATCH 01/25] mnist basic t2t model --- examples/mnist/implementations/models/t2t.py | 27 +++++++++++++++++++ examples/mnist/requirements.txt | 3 ++- examples/mnist/resources/models.yaml | 13 ++++----- .../mnist/resources/transformed_columns.yaml | 8 ++++++ examples/reviews/resources/models.yaml | 15 +++++++++++ pkg/workloads/tf_train/train_util.py | 25 +++++++++++++---- 6 files changed, 77 insertions(+), 14 deletions(-) create mode 100644 examples/mnist/implementations/models/t2t.py diff --git a/examples/mnist/implementations/models/t2t.py b/examples/mnist/implementations/models/t2t.py new file mode 100644 index 0000000000..4c5f8f87f8 --- /dev/null +++ b/examples/mnist/implementations/models/t2t.py @@ -0,0 +1,27 @@ +import tensorflow as tf +from tensor2tensor.utils import trainer_lib +from tensor2tensor import models # pylint: disable=unused-import +from tensor2tensor import problems # pylint: disable=unused-import +from tensor2tensor.data_generators import problem_hparams +from tensor2tensor.utils import registry + + +def create_estimator(run_config, model_config): + hparams = trainer_lib.create_hparams("basic_fc_small") + run_config.data_parallelism = None + run_config.t2t_device_info = {"num_async_replicas": 1} + problem = registry.problem("image_mnist") + problem.eval_metrics = lambda: [] + p_hparams = problem.get_hparams(hparams) + hparams.problem = problem + hparams.problem_hparams = p_hparams + hparams.warm_start_from = None + + estimator = trainer_lib.create_estimator("basic_fc_relu", hparams, run_config) + return estimator + + +def transform_tensors(features, labels=None): + features["inputs"] = tf.reshape(features["inputs"], [28, 28, 1]) + features["targets"] = tf.expand_dims(labels, -1) + return features, labels diff --git a/examples/mnist/requirements.txt b/examples/mnist/requirements.txt index 1c8587a200..cfbf63288b 100644 --- a/examples/mnist/requirements.txt +++ b/examples/mnist/requirements.txt @@ -1 +1,2 @@ -pillow==5.4.1 \ No newline at end of file +pillow==5.4.1 +tensor2tensor==1.10.0 diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index ab8a95ddc5..b7b4f255d4 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -39,18 +39,15 @@ batch_size: 64 num_epochs: 5 + - kind: model - name: dnn - path: implementations/models/dnn.py + name: t2t + path: implementations/models/t2t.py type: classification target_column: label feature_columns: - - image_pixels - hparams: - learning_rate: 0.01 - input_shape: [28, 28, 1] - output_shape: [10] - hidden_units: [100, 200] + - inputs + data_partition_ratio: training: 0.7 evaluation: 0.3 diff --git a/examples/mnist/resources/transformed_columns.yaml b/examples/mnist/resources/transformed_columns.yaml index f46f08aba4..b2b707c5a0 100644 --- a/examples/mnist/resources/transformed_columns.yaml +++ b/examples/mnist/resources/transformed_columns.yaml @@ -4,3 +4,11 @@ inputs: columns: image: image + + +- kind: transformed_column + name: inputs + transformer: decode_and_normalize + inputs: + columns: + image: image diff --git a/examples/reviews/resources/models.yaml b/examples/reviews/resources/models.yaml index 2c9a1d3ba0..3974e87354 100644 --- a/examples/reviews/resources/models.yaml +++ b/examples/reviews/resources/models.yaml @@ -30,3 +30,18 @@ training: batch_size: 64 num_steps: 5000 + +- kind: model + name: sentiment_transformer + type: classification + target_column: label_indexed + feature_columns: + - embedding_input + aggregates: + - reviews_vocab + data_partition_ratio: + training: 0.8 + evaluation: 0.2 + training: + batch_size: 64 + num_steps: 5000 diff --git a/pkg/workloads/tf_train/train_util.py b/pkg/workloads/tf_train/train_util.py index a45031afb0..c1051f46ee 100644 --- a/pkg/workloads/tf_train/train_util.py +++ b/pkg/workloads/tf_train/train_util.py @@ -33,6 +33,14 @@ def get_input_placeholder(model_name, ctx, training=True): return input_placeholder +def get_label_placeholder(model_name, ctx): + model = ctx.models[model_name] + + target_column_name = model["target_column"] + column_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[target_column_name]["type"]] + return tf.placeholder(shape=[None], dtype=column_type) + + def generate_example_parsing_fn(model_name, ctx, training=True): model = ctx.models[model_name] @@ -47,7 +55,7 @@ def _parse_example(example_proto): # Mode must be "training" or "evaluation" -def generate_input_fn(model_name, ctx, mode): +def generate_input_fn(model_name, ctx, mode, model_impl): model = ctx.models[model_name] filenames = ctx.get_training_data_parts(model_name, mode) @@ -66,6 +74,9 @@ def _input_fn(): if model[mode]["shuffle"]: dataset = dataset.shuffle(buffer_size) + if hasattr(model_impl, "transform_tensors"): + dataset = dataset.map(model_impl.transform_tensors) + dataset = dataset.batch(model[mode]["batch_size"]) dataset = dataset.prefetch(buffer_size) dataset = dataset.repeat() @@ -77,9 +88,13 @@ def _input_fn(): return _input_fn -def generate_json_serving_input_fn(model_name, ctx): +def generate_json_serving_input_fn(model_name, ctx, model_impl): def _json_serving_input_fn(): inputs = get_input_placeholder(model_name, ctx, training=False) + label = get_label_placeholder(model_name, ctx) + if hasattr(model_impl, "transform_tensors"): + inputs, _ = model_impl.transform_tensors(inputs, label) + features = {key: tf.expand_dims(tensor, -1) for key, tensor in inputs.items()} return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=inputs) @@ -130,9 +145,9 @@ def train(model_name, model_impl, ctx, model_dir): model_dir=model_dir, ) - train_input_fn = generate_input_fn(model_name, ctx, "training") - eval_input_fn = generate_input_fn(model_name, ctx, "evaluation") - serving_input_fn = generate_json_serving_input_fn(model_name, ctx) + train_input_fn = generate_input_fn(model_name, ctx, "training", model_impl) + eval_input_fn = generate_input_fn(model_name, ctx, "evaluation", model_impl) + serving_input_fn = generate_json_serving_input_fn(model_name, ctx, model_impl) exporter = tf.estimator.FinalExporter("estimator", serving_input_fn, as_text=False) dataset_metadata = aws.read_json_from_s3(model["dataset"]["metadata_key"], ctx.bucket) From 9917cd749f015abf351030571d763af6f0476c20 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 1 Mar 2019 09:50:39 -0500 Subject: [PATCH 02/25] add newline --- examples/mnist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mnist/requirements.txt b/examples/mnist/requirements.txt index 7aa3b0e700..cfbf63288b 100644 --- a/examples/mnist/requirements.txt +++ b/examples/mnist/requirements.txt @@ -1,2 +1,2 @@ pillow==5.4.1 -tensor2tensor==1.10.0 \ No newline at end of file +tensor2tensor==1.10.0 From 8edcb58c9868f1aadd508bfc629939a3e95e4c4c Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 6 Mar 2019 12:13:03 -0500 Subject: [PATCH 03/25] fix prediction time shaping --- .../mnist/implementations/models/basic.py | 11 ++++++--- examples/mnist/implementations/models/t2t.py | 23 ++++++++++++++---- examples/mnist/resources/apis.yaml | 6 +++++ examples/mnist/resources/models.yaml | 4 +++- .../mnist/resources/transformed_columns.yaml | 2 +- pkg/workloads/tf_api/api.py | 14 +++++++---- pkg/workloads/tf_train/train_util.py | 24 +++++++++++++++---- 7 files changed, 65 insertions(+), 19 deletions(-) diff --git a/examples/mnist/implementations/models/basic.py b/examples/mnist/implementations/models/basic.py index 0fe17eaed6..81e8c08811 100644 --- a/examples/mnist/implementations/models/basic.py +++ b/examples/mnist/implementations/models/basic.py @@ -5,9 +5,7 @@ def create_estimator(run_config, model_config): hparams = model_config["hparams"] def model_fn(features, labels, mode, params): - images = features["image_pixels"] - images = tf.reshape(images, [-1] + hparams["input_shape"]) - x = images + x = features["image_pixels"] for i, feature_count in enumerate(hparams["hidden_units"]): with tf.variable_scope("layer_%d" % i): @@ -55,3 +53,10 @@ def model_fn(features, labels, mode, params): estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) return estimator + + +def transform_tensors(features, labels, model_config, training): + hparams = model_config["hparams"] + + features["image_pixels"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) + return features, labels diff --git a/examples/mnist/implementations/models/t2t.py b/examples/mnist/implementations/models/t2t.py index 4c5f8f87f8..f4cb98f093 100644 --- a/examples/mnist/implementations/models/t2t.py +++ b/examples/mnist/implementations/models/t2t.py @@ -7,21 +7,34 @@ def create_estimator(run_config, model_config): - hparams = trainer_lib.create_hparams("basic_fc_small") + # t2t expects these keys in run_config run_config.data_parallelism = None run_config.t2t_device_info = {"num_async_replicas": 1} + + hparams = trainer_lib.create_hparams("basic_fc_small") problem = registry.problem("image_mnist") - problem.eval_metrics = lambda: [] p_hparams = problem.get_hparams(hparams) hparams.problem = problem hparams.problem_hparams = p_hparams + + # don't need eval_metrics + problem.eval_metrics = lambda: [] + + # t2t expects this key hparams.warm_start_from = None estimator = trainer_lib.create_estimator("basic_fc_relu", hparams, run_config) return estimator -def transform_tensors(features, labels=None): - features["inputs"] = tf.reshape(features["inputs"], [28, 28, 1]) - features["targets"] = tf.expand_dims(labels, -1) +def transform_tensors(features, labels, model_config, training): + hparams = model_config["hparams"] + + # t2t model performs flattening and expects this input key + features["inputs"] = tf.reshape(features["inputs"], hparams["input_shape"]) + + if training: + # t2t expects this key and dimension + features["targets"] = tf.expand_dims(labels, 0) + return features, labels diff --git a/examples/mnist/resources/apis.yaml b/examples/mnist/resources/apis.yaml index b66fb6b8fa..c90e80fc8d 100644 --- a/examples/mnist/resources/apis.yaml +++ b/examples/mnist/resources/apis.yaml @@ -9,3 +9,9 @@ model_name: conv compute: replicas: 1 + +- kind: api + name: t2t-classifier + model_name: t2t + compute: + replicas: 1 diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index b7b4f255d4..e01ee609b7 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -47,7 +47,9 @@ target_column: label feature_columns: - inputs - + prediction_key: outputs + hparams: + input_shape: [28, 28, 1] data_partition_ratio: training: 0.7 evaluation: 0.3 diff --git a/examples/mnist/resources/transformed_columns.yaml b/examples/mnist/resources/transformed_columns.yaml index b2b707c5a0..642d2ba0c6 100644 --- a/examples/mnist/resources/transformed_columns.yaml +++ b/examples/mnist/resources/transformed_columns.yaml @@ -5,7 +5,7 @@ columns: image: image - +# t2t needs this specific key - kind: transformed_column name: inputs transformer: decode_and_normalize diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index 910f660dd5..8e3bccde1b 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -31,6 +31,7 @@ from lib.exceptions import CortexException, UserRuntimeException, UserException from google.protobuf import json_format import time +import numpy as np logger = get_logger() logger.propagate = False # prevent double logging (flask modifies root logger) @@ -89,18 +90,21 @@ def transform_sample(sample): def create_prediction_request(transformed_sample): ctx = local_cache["ctx"] - + signatureDef = local_cache["metadata"]["signatureDef"] + signature_key = list(signatureDef.keys())[0] prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "default" - prediction_request.model_spec.signature_name = list( - local_cache["metadata"]["signatureDef"].keys() - )[0] + prediction_request.model_spec.signature_name = signature_key for column_name, value in transformed_sample.items(): data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[column_name]["type"]] shape = [1] if util.is_list(value): - shape = [len(value)] + shape = [] + for dim in signatureDef[signature_key]["inputs"][column_name]["tensorShape"]["dim"]: + shape.append(int(dim["size"])) + value = np.asarray(value).reshape(shape).tolist() + tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) diff --git a/pkg/workloads/tf_train/train_util.py b/pkg/workloads/tf_train/train_util.py index c1051f46ee..ca6b16ac98 100644 --- a/pkg/workloads/tf_train/train_util.py +++ b/pkg/workloads/tf_train/train_util.py @@ -41,6 +41,16 @@ def get_label_placeholder(model_name, ctx): return tf.placeholder(shape=[None], dtype=column_type) +def get_transform_tensor_fn(ctx, model_impl, model_name, training): + model = ctx.models[model_name] + model_config = ctx.model_config(model["name"]) + + def transform_tensor_fn_wrapper(inputs, labels): + return model_impl.transform_tensors(inputs, labels, model_config, training) + + return transform_tensor_fn_wrapper + + def generate_example_parsing_fn(model_name, ctx, training=True): model = ctx.models[model_name] @@ -75,7 +85,9 @@ def _input_fn(): dataset = dataset.shuffle(buffer_size) if hasattr(model_impl, "transform_tensors"): - dataset = dataset.map(model_impl.transform_tensors) + dataset = dataset.map( + get_transform_tensor_fn(ctx, model_impl, model_name, training=True) + ) dataset = dataset.batch(model[mode]["batch_size"]) dataset = dataset.prefetch(buffer_size) @@ -91,11 +103,15 @@ def _input_fn(): def generate_json_serving_input_fn(model_name, ctx, model_impl): def _json_serving_input_fn(): inputs = get_input_placeholder(model_name, ctx, training=False) - label = get_label_placeholder(model_name, ctx) + labels = get_label_placeholder(model_name, ctx) + + features = inputs if hasattr(model_impl, "transform_tensors"): - inputs, _ = model_impl.transform_tensors(inputs, label) + features, _ = get_transform_tensor_fn(ctx, model_impl, model_name, training=False)( + inputs, labels + ) - features = {key: tf.expand_dims(tensor, -1) for key, tensor in inputs.items()} + features = {key: tf.expand_dims(tensor, 0) for key, tensor in features.items()} return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=inputs) return _json_serving_input_fn From a6e0f47d82f2d16fd1a0892f2076907de01cc0ea Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 6 Mar 2019 12:59:12 -0500 Subject: [PATCH 04/25] clean reviews example --- examples/reviews/resources/models.yaml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/examples/reviews/resources/models.yaml b/examples/reviews/resources/models.yaml index 3974e87354..2c9a1d3ba0 100644 --- a/examples/reviews/resources/models.yaml +++ b/examples/reviews/resources/models.yaml @@ -30,18 +30,3 @@ training: batch_size: 64 num_steps: 5000 - -- kind: model - name: sentiment_transformer - type: classification - target_column: label_indexed - feature_columns: - - embedding_input - aggregates: - - reviews_vocab - data_partition_ratio: - training: 0.8 - evaluation: 0.2 - training: - batch_size: 64 - num_steps: 5000 From df24557b3cb4aa48e32cef521faae49492511a1c Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 6 Mar 2019 21:28:19 -0500 Subject: [PATCH 05/25] if undefined shape, take the length --- pkg/workloads/tf_api/api.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index 8e3bccde1b..b296555a0b 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -102,9 +102,14 @@ def create_prediction_request(transformed_sample): if util.is_list(value): shape = [] for dim in signatureDef[signature_key]["inputs"][column_name]["tensorShape"]["dim"]: - shape.append(int(dim["size"])) - value = np.asarray(value).reshape(shape).tolist() + dim = int(dim["size"]) + if dim == -1: + dim = len(value) + shape.append(dim) + value = np.asarray(value).reshape(shape).tolist() + util.log_pretty(value) + util.log_pretty(shape) tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) From b5c60a330548cc96b7962a76e1fe1e5a5d9f8813 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 7 Mar 2019 09:26:11 -0500 Subject: [PATCH 06/25] add numpy to api image --- pkg/workloads/tf_api/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/workloads/tf_api/requirements.txt b/pkg/workloads/tf_api/requirements.txt index ce324dcf49..6a070802fe 100644 --- a/pkg/workloads/tf_api/requirements.txt +++ b/pkg/workloads/tf_api/requirements.txt @@ -4,3 +4,4 @@ flask==1.0.2 flask-api==1.1 waitress==1.2.1 tensorflow-serving-api==1.12.0 +numpy==1.15.4 From 7145df89c0305fc3fe6375a976254608e71d27df Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 7 Mar 2019 11:01:48 -0500 Subject: [PATCH 07/25] remove numpy dep, dont restrict unspecified python pkgs --- pkg/workloads/lib/package.py | 2 ++ pkg/workloads/lib/requirements.txt | 2 +- pkg/workloads/tf_api/requirements.txt | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/workloads/lib/package.py b/pkg/workloads/lib/package.py index 6fede63d66..f8c4636ad7 100644 --- a/pkg/workloads/lib/package.py +++ b/pkg/workloads/lib/package.py @@ -44,6 +44,8 @@ def get_restricted_packages(): for req_file in req_files: with open(req_file) as f: for req in requirements.parse(f): + if len(req.specs) == 0: + continue cortex_packages[req.name] = req.specs[0][1] return cortex_packages diff --git a/pkg/workloads/lib/requirements.txt b/pkg/workloads/lib/requirements.txt index abdabe7e0e..4f7561f73c 100644 --- a/pkg/workloads/lib/requirements.txt +++ b/pkg/workloads/lib/requirements.txt @@ -2,5 +2,5 @@ boto3==1.9.78 msgpack==0.6.1 -numpy==1.15.4 requirements-parser==0.2.0 +numpy diff --git a/pkg/workloads/tf_api/requirements.txt b/pkg/workloads/tf_api/requirements.txt index 6a070802fe..ce324dcf49 100644 --- a/pkg/workloads/tf_api/requirements.txt +++ b/pkg/workloads/tf_api/requirements.txt @@ -4,4 +4,3 @@ flask==1.0.2 flask-api==1.1 waitress==1.2.1 tensorflow-serving-api==1.12.0 -numpy==1.15.4 From e4a01c9c5d09381be03a55ea31b4d6cb3b32709d Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 7 Mar 2019 11:39:55 -0500 Subject: [PATCH 08/25] add TODO comment to address later --- .../mnist/implementations/transformers/decode_and_normalize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/mnist/implementations/transformers/decode_and_normalize.py b/examples/mnist/implementations/transformers/decode_and_normalize.py index 2ff41440e3..b239ec3447 100644 --- a/examples/mnist/implementations/transformers/decode_and_normalize.py +++ b/examples/mnist/implementations/transformers/decode_and_normalize.py @@ -15,4 +15,6 @@ def transform_python(sample, args): # https://www.tensorflow.org/api_docs/python/tf/image/per_image_standardization adjusted_stddev = max(np.std(decoded_image), 1.0 / math.sqrt(decoded_image.size)) standardized_image = (decoded_image - np.mean(decoded_image)) / adjusted_stddev + + # TODO remove flatten() once we support spark tensors return standardized_image.flatten().tolist() From f8c128df4e394cd2c98886acce3ed1be5a1cfcdb Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 7 Mar 2019 22:03:47 -0500 Subject: [PATCH 09/25] clean up --- examples/mnist/implementations/models/basic.py | 3 ++- pkg/workloads/tf_api/api.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/mnist/implementations/models/basic.py b/examples/mnist/implementations/models/basic.py index 81e8c08811..6c058252c1 100644 --- a/examples/mnist/implementations/models/basic.py +++ b/examples/mnist/implementations/models/basic.py @@ -5,8 +5,9 @@ def create_estimator(run_config, model_config): hparams = model_config["hparams"] def model_fn(features, labels, mode, params): - x = features["image_pixels"] + images = features["image_pixels"] + x = images for i, feature_count in enumerate(hparams["hidden_units"]): with tf.variable_scope("layer_%d" % i): if hparams["layer_type"] == "conv": diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index b296555a0b..75a232d98b 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -108,8 +108,6 @@ def create_prediction_request(transformed_sample): shape.append(dim) value = np.asarray(value).reshape(shape).tolist() - util.log_pretty(value) - util.log_pretty(shape) tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) From 17144a29b426c734c0e5c8a653d424709e626912 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 8 Mar 2019 12:40:12 -0500 Subject: [PATCH 10/25] clean up example and transform tensor api --- .../mnist/implementations/models/basic.py | 2 +- examples/mnist/implementations/models/t2t.py | 9 +++--- examples/mnist/resources/models.yaml | 2 +- .../mnist/resources/transformed_columns.yaml | 8 ----- pkg/workloads/tf_api/api.py | 2 ++ pkg/workloads/tf_train/train_util.py | 29 ++++--------------- 6 files changed, 14 insertions(+), 38 deletions(-) diff --git a/examples/mnist/implementations/models/basic.py b/examples/mnist/implementations/models/basic.py index 6c058252c1..5871d01118 100644 --- a/examples/mnist/implementations/models/basic.py +++ b/examples/mnist/implementations/models/basic.py @@ -56,7 +56,7 @@ def model_fn(features, labels, mode, params): return estimator -def transform_tensors(features, labels, model_config, training): +def transform_tensors(features, labels, model_config): hparams = model_config["hparams"] features["image_pixels"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) diff --git a/examples/mnist/implementations/models/t2t.py b/examples/mnist/implementations/models/t2t.py index f4cb98f093..a0b11fc42e 100644 --- a/examples/mnist/implementations/models/t2t.py +++ b/examples/mnist/implementations/models/t2t.py @@ -27,14 +27,13 @@ def create_estimator(run_config, model_config): return estimator -def transform_tensors(features, labels, model_config, training): +def transform_tensors(features, labels, model_config): hparams = model_config["hparams"] # t2t model performs flattening and expects this input key - features["inputs"] = tf.reshape(features["inputs"], hparams["input_shape"]) + features["inputs"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) - if training: - # t2t expects this key and dimension - features["targets"] = tf.expand_dims(labels, 0) + # t2t expects this key and dimension + features["targets"] = tf.expand_dims(labels, 0) return features, labels diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index e01ee609b7..990865d669 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -46,7 +46,7 @@ type: classification target_column: label feature_columns: - - inputs + - image_pixels prediction_key: outputs hparams: input_shape: [28, 28, 1] diff --git a/examples/mnist/resources/transformed_columns.yaml b/examples/mnist/resources/transformed_columns.yaml index 642d2ba0c6..f46f08aba4 100644 --- a/examples/mnist/resources/transformed_columns.yaml +++ b/examples/mnist/resources/transformed_columns.yaml @@ -4,11 +4,3 @@ inputs: columns: image: image - -# t2t needs this specific key -- kind: transformed_column - name: inputs - transformer: decode_and_normalize - inputs: - columns: - image: image diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index 75a232d98b..3fb3c57481 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -91,12 +91,14 @@ def transform_sample(sample): def create_prediction_request(transformed_sample): ctx = local_cache["ctx"] signatureDef = local_cache["metadata"]["signatureDef"] + util.log_pretty(signatureDef, indent=4) signature_key = list(signatureDef.keys())[0] prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "default" prediction_request.model_spec.signature_name = signature_key for column_name, value in transformed_sample.items(): + util.log_pretty(column_name, indent=4) data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[column_name]["type"]] shape = [1] if util.is_list(value): diff --git a/pkg/workloads/tf_train/train_util.py b/pkg/workloads/tf_train/train_util.py index ca6b16ac98..3c7697f023 100644 --- a/pkg/workloads/tf_train/train_util.py +++ b/pkg/workloads/tf_train/train_util.py @@ -41,12 +41,12 @@ def get_label_placeholder(model_name, ctx): return tf.placeholder(shape=[None], dtype=column_type) -def get_transform_tensor_fn(ctx, model_impl, model_name, training): +def get_transform_tensor_fn(ctx, model_impl, model_name): model = ctx.models[model_name] model_config = ctx.model_config(model["name"]) def transform_tensor_fn_wrapper(inputs, labels): - return model_impl.transform_tensors(inputs, labels, model_config, training) + return model_impl.transform_tensors(inputs, labels, model_config) return transform_tensor_fn_wrapper @@ -85,9 +85,7 @@ def _input_fn(): dataset = dataset.shuffle(buffer_size) if hasattr(model_impl, "transform_tensors"): - dataset = dataset.map( - get_transform_tensor_fn(ctx, model_impl, model_name, training=True) - ) + dataset = dataset.map(get_transform_tensor_fn(ctx, model_impl, model_name)) dataset = dataset.batch(model[mode]["batch_size"]) dataset = dataset.prefetch(buffer_size) @@ -105,11 +103,10 @@ def _json_serving_input_fn(): inputs = get_input_placeholder(model_name, ctx, training=False) labels = get_label_placeholder(model_name, ctx) - features = inputs + # copy inputs + features = {key: tensor for key, tensor in inputs.items()} if hasattr(model_impl, "transform_tensors"): - features, _ = get_transform_tensor_fn(ctx, model_impl, model_name, training=False)( - inputs, labels - ) + features, _ = get_transform_tensor_fn(ctx, model_impl, model_name)(features, labels) features = {key: tf.expand_dims(tensor, 0) for key, tensor in features.items()} return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=inputs) @@ -117,20 +114,6 @@ def _json_serving_input_fn(): return _json_serving_input_fn -def generate_example_serving_input_fn(model_name, ctx): - def _example_serving_input_fn(): - feature_spec = tf_lib.get_feature_spec(model_name, ctx, training=False) - example_bytestring = tf.placeholder(shape=[None], dtype=tf.string) - feature_scalars = tf.parse_single_example(example_bytestring, feature_spec) - features = {key: tf.expand_dims(tensor, -1) for key, tensor in feature_scalars.items()} - - return tf.estimator.export.ServingInputReceiver( - features=features, receiver_tensors={"example_proto": example_bytestring} - ) - - return _example_serving_input_fn - - def get_regression_eval_metrics(labels, predictions): metrics = {} prediction_values = predictions["predictions"] From e73d95c86c8ea8360e0fc29091ef12fffd98e1db Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 8 Mar 2019 12:41:32 -0500 Subject: [PATCH 11/25] transform_tensors -> transform_tensorflow --- examples/mnist/implementations/models/basic.py | 2 +- examples/mnist/implementations/models/t2t.py | 2 +- pkg/workloads/tf_train/train_util.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/mnist/implementations/models/basic.py b/examples/mnist/implementations/models/basic.py index 5871d01118..fff266ee07 100644 --- a/examples/mnist/implementations/models/basic.py +++ b/examples/mnist/implementations/models/basic.py @@ -56,7 +56,7 @@ def model_fn(features, labels, mode, params): return estimator -def transform_tensors(features, labels, model_config): +def transform_tensorflow(features, labels, model_config): hparams = model_config["hparams"] features["image_pixels"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) diff --git a/examples/mnist/implementations/models/t2t.py b/examples/mnist/implementations/models/t2t.py index a0b11fc42e..db7595465a 100644 --- a/examples/mnist/implementations/models/t2t.py +++ b/examples/mnist/implementations/models/t2t.py @@ -27,7 +27,7 @@ def create_estimator(run_config, model_config): return estimator -def transform_tensors(features, labels, model_config): +def transform_tensorflow(features, labels, model_config): hparams = model_config["hparams"] # t2t model performs flattening and expects this input key diff --git a/pkg/workloads/tf_train/train_util.py b/pkg/workloads/tf_train/train_util.py index 3c7697f023..eeb9cce0d2 100644 --- a/pkg/workloads/tf_train/train_util.py +++ b/pkg/workloads/tf_train/train_util.py @@ -46,7 +46,7 @@ def get_transform_tensor_fn(ctx, model_impl, model_name): model_config = ctx.model_config(model["name"]) def transform_tensor_fn_wrapper(inputs, labels): - return model_impl.transform_tensors(inputs, labels, model_config) + return model_impl.transform_tensorflow(inputs, labels, model_config) return transform_tensor_fn_wrapper @@ -84,7 +84,7 @@ def _input_fn(): if model[mode]["shuffle"]: dataset = dataset.shuffle(buffer_size) - if hasattr(model_impl, "transform_tensors"): + if hasattr(model_impl, "transform_tensorflow"): dataset = dataset.map(get_transform_tensor_fn(ctx, model_impl, model_name)) dataset = dataset.batch(model[mode]["batch_size"]) @@ -105,7 +105,7 @@ def _json_serving_input_fn(): # copy inputs features = {key: tensor for key, tensor in inputs.items()} - if hasattr(model_impl, "transform_tensors"): + if hasattr(model_impl, "transform_tensorflow"): features, _ = get_transform_tensor_fn(ctx, model_impl, model_name)(features, labels) features = {key: tf.expand_dims(tensor, 0) for key, tensor in features.items()} From 99e0b2a3c7fb670dc8b0b7145701a9c42a90f468 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 8 Mar 2019 13:36:40 -0500 Subject: [PATCH 12/25] add back dnn --- examples/mnist/resources/models.yaml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index 990865d669..7ce39f472b 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -1,22 +1,17 @@ - kind: model - name: dense - path: implementations/models/basic.py + name: dnn + path: implementations/models/dnn.py type: classification target_column: label feature_columns: - image_pixels hparams: - layer_type: basic learning_rate: 0.01 - input_shape: [784] output_shape: [10] - hidden_units: [100, 200, 10] + hidden_units: [100, 200] data_partition_ratio: training: 0.7 evaluation: 0.3 - training: - batch_size: 64 - num_epochs: 5 - kind: model name: conv From ad0be817ba68f6db157e8a913f8df38e17184453 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 8 Mar 2019 13:37:01 -0500 Subject: [PATCH 13/25] add back dnn --- examples/mnist/resources/apis.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mnist/resources/apis.yaml b/examples/mnist/resources/apis.yaml index c90e80fc8d..3bab92bd2a 100644 --- a/examples/mnist/resources/apis.yaml +++ b/examples/mnist/resources/apis.yaml @@ -1,6 +1,6 @@ - kind: api - name: dense-classifier - model_name: dense + name: dnn-classifier + model_name: dnn compute: replicas: 1 From e9e7c92c09fc275bb1d4ae4ae3f2cfc26c062afd Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 8 Mar 2019 14:04:43 -0500 Subject: [PATCH 14/25] fix example --- examples/mnist/resources/models.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index 7ce39f472b..75cca4bb56 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -7,6 +7,7 @@ - image_pixels hparams: learning_rate: 0.01 + input_shape: [784] output_shape: [10] hidden_units: [100, 200] data_partition_ratio: @@ -26,7 +27,7 @@ input_shape: [28, 28, 1] output_shape: [10] kernel_size: 2 - hidden_units: [10, 10, 10] + hidden_units: [100, 50, 100] data_partition_ratio: training: 0.7 evaluation: 0.3 From 9be820a9215296423c3844082420f2972b3a88fa Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Fri, 8 Mar 2019 15:21:03 -0500 Subject: [PATCH 15/25] remove TODO --- .../mnist/implementations/transformers/decode_and_normalize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/mnist/implementations/transformers/decode_and_normalize.py b/examples/mnist/implementations/transformers/decode_and_normalize.py index b239ec3447..dcf801c2f2 100644 --- a/examples/mnist/implementations/transformers/decode_and_normalize.py +++ b/examples/mnist/implementations/transformers/decode_and_normalize.py @@ -16,5 +16,4 @@ def transform_python(sample, args): adjusted_stddev = max(np.std(decoded_image), 1.0 / math.sqrt(decoded_image.size)) standardized_image = (decoded_image - np.mean(decoded_image)) / adjusted_stddev - # TODO remove flatten() once we support spark tensors return standardized_image.flatten().tolist() From 5b5263bb6e8c4ce22460fe6b826375e743f3b4c2 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Mon, 11 Mar 2019 12:49:11 -0400 Subject: [PATCH 16/25] add docs --- docs/applications/implementations/models.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/applications/implementations/models.md b/docs/applications/implementations/models.md index c81a97d9fb..7beeaa8574 100644 --- a/docs/applications/implementations/models.md +++ b/docs/applications/implementations/models.md @@ -49,3 +49,20 @@ def create_estimator(run_config, model_config): ## Customization You can import PyPI packages or your own Python packages to help create more complex models. See [Python Packages](../advanced/python-packages.md) for more details. + + +# Tensorflow Transformations +You can preprocess input features and labels to your model by defining a `transform_tensorflow` function. This is useful in cases where you don't have access to the model implementation which expects a multi-dimensional tensor, for example. + +```python +def transform_tensorflow(features, labels, model_config): + hparams = model_config["hparams"] + + # t2t model performs flattening and expects this input key + features["inputs"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) + + # t2t expects this key and dimension + features["targets"] = tf.expand_dims(labels, 0) + + return features, labels +``` From 71ea67fa1c94965ee5f80a77f05c2bc68818fad5 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 13 Mar 2019 11:40:34 -0400 Subject: [PATCH 17/25] address comments --- docs/applications/implementations/models.md | 25 ++++++++++++++++--- .../models/{basic.py => custom.py} | 4 +-- examples/mnist/implementations/models/t2t.py | 3 ++- examples/mnist/resources/models.yaml | 2 +- pkg/workloads/lib/package.py | 2 -- pkg/workloads/lib/requirements.txt | 1 - pkg/workloads/tf_api/api.py | 18 ++++++------- pkg/workloads/tf_train/train_util.py | 1 - 8 files changed, 34 insertions(+), 22 deletions(-) rename examples/mnist/implementations/models/{basic.py => custom.py} (97%) diff --git a/docs/applications/implementations/models.md b/docs/applications/implementations/models.md index 7beeaa8574..f634aaaef9 100644 --- a/docs/applications/implementations/models.md +++ b/docs/applications/implementations/models.md @@ -52,16 +52,35 @@ You can import PyPI packages or your own Python packages to help create more com # Tensorflow Transformations -You can preprocess input features and labels to your model by defining a `transform_tensorflow` function. This is useful in cases where you don't have access to the model implementation which expects a multi-dimensional tensor, for example. +You can preprocess input features and labels to your model by defining a `transform_tensorflow` function. An example of when this might be useful is to reshape a tensor to feed into a pre-made model. ```python def transform_tensorflow(features, labels, model_config): + """Create a mapping function for the tf.Dataset API. You can define + tensor transformations you want to apply to the features and labels of + each training sample. + + Args: + features: A feature dictionary of column names to feature tensors. + + labels: The label tensor. + + model_config: The Cortex configuration for the model. + Note: nested resources are expanded (e.g. model_config["target_column"]) + will be the configuration for the target column, rather than the + name of the target column). + + + Returns: + features and label tensors for the current sample. + """ + hparams = model_config["hparams"] - # t2t model performs flattening and expects this input key + # tensor2tensor model performs flattening and expects this input key, features["inputs"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) - # t2t expects this key and dimension + # tensor2tensor expects this key and dimensionality features["targets"] = tf.expand_dims(labels, 0) return features, labels diff --git a/examples/mnist/implementations/models/basic.py b/examples/mnist/implementations/models/custom.py similarity index 97% rename from examples/mnist/implementations/models/basic.py rename to examples/mnist/implementations/models/custom.py index fff266ee07..3583113af5 100644 --- a/examples/mnist/implementations/models/basic.py +++ b/examples/mnist/implementations/models/custom.py @@ -5,9 +5,7 @@ def create_estimator(run_config, model_config): hparams = model_config["hparams"] def model_fn(features, labels, mode, params): - images = features["image_pixels"] - - x = images + x = features["image_pixels"] for i, feature_count in enumerate(hparams["hidden_units"]): with tf.variable_scope("layer_%d" % i): if hparams["layer_type"] == "conv": diff --git a/examples/mnist/implementations/models/t2t.py b/examples/mnist/implementations/models/t2t.py index db7595465a..53ce2dfa57 100644 --- a/examples/mnist/implementations/models/t2t.py +++ b/examples/mnist/implementations/models/t2t.py @@ -11,6 +11,7 @@ def create_estimator(run_config, model_config): run_config.data_parallelism = None run_config.t2t_device_info = {"num_async_replicas": 1} + # t2t has its own set of hyperparameters we can use hparams = trainer_lib.create_hparams("basic_fc_small") problem = registry.problem("image_mnist") p_hparams = problem.get_hparams(hparams) @@ -33,7 +34,7 @@ def transform_tensorflow(features, labels, model_config): # t2t model performs flattening and expects this input key features["inputs"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) - # t2t expects this key and dimension + # t2t expects this key and dimensionality features["targets"] = tf.expand_dims(labels, 0) return features, labels diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index 75cca4bb56..f6ae5fdacd 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -16,7 +16,7 @@ - kind: model name: conv - path: implementations/models/basic.py + path: implementations/models/custom.py type: classification target_column: label feature_columns: diff --git a/pkg/workloads/lib/package.py b/pkg/workloads/lib/package.py index f8c4636ad7..6fede63d66 100644 --- a/pkg/workloads/lib/package.py +++ b/pkg/workloads/lib/package.py @@ -44,8 +44,6 @@ def get_restricted_packages(): for req_file in req_files: with open(req_file) as f: for req in requirements.parse(f): - if len(req.specs) == 0: - continue cortex_packages[req.name] = req.specs[0][1] return cortex_packages diff --git a/pkg/workloads/lib/requirements.txt b/pkg/workloads/lib/requirements.txt index 4f7561f73c..fefdedaa41 100644 --- a/pkg/workloads/lib/requirements.txt +++ b/pkg/workloads/lib/requirements.txt @@ -3,4 +3,3 @@ boto3==1.9.78 msgpack==0.6.1 requirements-parser==0.2.0 -numpy diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index 3fb3c57481..d0ec617df4 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -31,7 +31,6 @@ from lib.exceptions import CortexException, UserRuntimeException, UserException from google.protobuf import json_format import time -import numpy as np logger = get_logger() logger.propagate = False # prevent double logging (flask modifies root logger) @@ -91,24 +90,23 @@ def transform_sample(sample): def create_prediction_request(transformed_sample): ctx = local_cache["ctx"] signatureDef = local_cache["metadata"]["signatureDef"] - util.log_pretty(signatureDef, indent=4) signature_key = list(signatureDef.keys())[0] prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "default" prediction_request.model_spec.signature_name = signature_key for column_name, value in transformed_sample.items(): - util.log_pretty(column_name, indent=4) data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[column_name]["type"]] shape = [1] if util.is_list(value): - shape = [] - for dim in signatureDef[signature_key]["inputs"][column_name]["tensorShape"]["dim"]: - dim = int(dim["size"]) - if dim == -1: - dim = len(value) - - shape.append(dim) + shape = [len(value)] + # shape = [] + # for dim in signatureDef[signature_key]["inputs"][column_name]["tensorShape"]["dim"]: + # dim = int(dim["size"]) + # if dim == -1: + # dim = len(value) + + # shape.append(dim) value = np.asarray(value).reshape(shape).tolist() tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) diff --git a/pkg/workloads/tf_train/train_util.py b/pkg/workloads/tf_train/train_util.py index eeb9cce0d2..db141494e0 100644 --- a/pkg/workloads/tf_train/train_util.py +++ b/pkg/workloads/tf_train/train_util.py @@ -103,7 +103,6 @@ def _json_serving_input_fn(): inputs = get_input_placeholder(model_name, ctx, training=False) labels = get_label_placeholder(model_name, ctx) - # copy inputs features = {key: tensor for key, tensor in inputs.items()} if hasattr(model_impl, "transform_tensorflow"): features, _ = get_transform_tensor_fn(ctx, model_impl, model_name)(features, labels) From 9a9b6b9ff0fa29c505b23341396a7efbbded455f Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 13 Mar 2019 13:22:27 -0400 Subject: [PATCH 18/25] remove commented code --- pkg/workloads/tf_api/api.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index d0ec617df4..d66e1f5255 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -100,14 +100,7 @@ def create_prediction_request(transformed_sample): shape = [1] if util.is_list(value): shape = [len(value)] - # shape = [] - # for dim in signatureDef[signature_key]["inputs"][column_name]["tensorShape"]["dim"]: - # dim = int(dim["size"]) - # if dim == -1: - # dim = len(value) - - # shape.append(dim) - value = np.asarray(value).reshape(shape).tolist() + tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) From b8903ba45302260cdf2268ba88f3a87c33d7e5a2 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 13 Mar 2019 13:27:38 -0400 Subject: [PATCH 19/25] clean up extra line --- pkg/workloads/tf_api/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py index d66e1f5255..e52ed51e66 100644 --- a/pkg/workloads/tf_api/api.py +++ b/pkg/workloads/tf_api/api.py @@ -100,7 +100,6 @@ def create_prediction_request(transformed_sample): shape = [1] if util.is_list(value): shape = [len(value)] - tensor_proto = tf.make_tensor_proto([value], dtype=data_type, shape=shape) prediction_request.inputs[column_name].CopyFrom(tensor_proto) From b035040fdf73cde28f714a75c6731fe8f8abf59f Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 13 Mar 2019 15:07:21 -0400 Subject: [PATCH 20/25] add transform_tensorflow to model_impl check --- pkg/workloads/lib/context.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py index a164dc94bb..0f56ea69c9 100644 --- a/pkg/workloads/lib/context.py +++ b/pkg/workloads/lib/context.py @@ -460,7 +460,10 @@ def resource_status_key(self, resource): MODEL_IMPL_VALIDATION = { - "required": [{"name": "create_estimator", "args": ["run_config", "model_config"]}] + "required": [{"name": "create_estimator", "args": ["run_config", "model_config"]}], + "optional": [ + {"name": "transform_tensorflow", "args": ["features", "labels", "model_config"]} + ] } AGGREGATOR_IMPL_VALIDATION = { From 08c097b6bb7d6573f0cafb477d71f022b2d62274 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 13 Mar 2019 15:24:07 -0400 Subject: [PATCH 21/25] format --- pkg/workloads/lib/context.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py index 0f56ea69c9..3387390912 100644 --- a/pkg/workloads/lib/context.py +++ b/pkg/workloads/lib/context.py @@ -461,9 +461,7 @@ def resource_status_key(self, resource): MODEL_IMPL_VALIDATION = { "required": [{"name": "create_estimator", "args": ["run_config", "model_config"]}], - "optional": [ - {"name": "transform_tensorflow", "args": ["features", "labels", "model_config"]} - ] + "optional": [{"name": "transform_tensorflow", "args": ["features", "labels", "model_config"]}], } AGGREGATOR_IMPL_VALIDATION = { From a1a8a4df5918ce82725b3aae9bccb546fb4487c4 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 13 Mar 2019 15:31:48 -0400 Subject: [PATCH 22/25] remove extra new line --- .../mnist/implementations/transformers/decode_and_normalize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/mnist/implementations/transformers/decode_and_normalize.py b/examples/mnist/implementations/transformers/decode_and_normalize.py index dcf801c2f2..2ff41440e3 100644 --- a/examples/mnist/implementations/transformers/decode_and_normalize.py +++ b/examples/mnist/implementations/transformers/decode_and_normalize.py @@ -15,5 +15,4 @@ def transform_python(sample, args): # https://www.tensorflow.org/api_docs/python/tf/image/per_image_standardization adjusted_stddev = max(np.std(decoded_image), 1.0 / math.sqrt(decoded_image.size)) standardized_image = (decoded_image - np.mean(decoded_image)) / adjusted_stddev - return standardized_image.flatten().tolist() From f44d9f24fbb736afe685067b7d8a50a42025562a Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 14 Mar 2019 10:09:17 -0400 Subject: [PATCH 23/25] update docs --- docs/applications/implementations/models.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/applications/implementations/models.md b/docs/applications/implementations/models.md index 56fae469ac..3b98c4e699 100644 --- a/docs/applications/implementations/models.md +++ b/docs/applications/implementations/models.md @@ -69,9 +69,9 @@ You can preprocess input features and labels to your model by defining a `transf ```python def transform_tensorflow(features, labels, model_config): - """Create a mapping function for the tf.Dataset API. You can define - tensor transformations you want to apply to the features and labels of - each training sample. + """Define tensor transformations for the feature and label tensors. You can define + tensor transformations you want to apply to the features and labels tensors before + they are passed to the model. Args: features: A feature dictionary of column names to feature tensors. @@ -85,9 +85,17 @@ def transform_tensorflow(features, labels, model_config): Returns: - features and label tensors for the current sample. + features and labels tensors. """ + pass +``` + +## Example +```python +import tensorflow as tf + +def transform_tensorflow(features, labels, model_config): hparams = model_config["hparams"] # tensor2tensor model performs flattening and expects this input key, From 0710af41fcecb295c44b514cb5fcdc22cf462690 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 14 Mar 2019 12:32:42 -0400 Subject: [PATCH 24/25] update mnist conv model --- examples/mnist/resources/models.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mnist/resources/models.yaml b/examples/mnist/resources/models.yaml index f6ae5fdacd..a789ab9d0c 100644 --- a/examples/mnist/resources/models.yaml +++ b/examples/mnist/resources/models.yaml @@ -26,8 +26,8 @@ learning_rate: 0.01 input_shape: [28, 28, 1] output_shape: [10] - kernel_size: 2 - hidden_units: [100, 50, 100] + kernel_size: 4 + hidden_units: [10, 10, 10] data_partition_ratio: training: 0.7 evaluation: 0.3 From 3c1d6a1031c4ebe09319ae8fdb13ca34455a8c3f Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 14 Mar 2019 14:34:19 -0400 Subject: [PATCH 25/25] address doc comments --- docs/applications/implementations/models.md | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/applications/implementations/models.md b/docs/applications/implementations/models.md index 3b98c4e699..4262c0edb4 100644 --- a/docs/applications/implementations/models.md +++ b/docs/applications/implementations/models.md @@ -65,13 +65,13 @@ You can install additional PyPI packages and import your own Python packages. Se # Tensorflow Transformations -You can preprocess input features and labels to your model by defining a `transform_tensorflow` function. An example of when this might be useful is to reshape a tensor to feed into a pre-made model. +You can preprocess input features and labels to your model by defining a `transform_tensorflow` function. You can define tensor transformations you want to apply to the features and labels tensors before they are passed to the model. + +## Implementation ```python def transform_tensorflow(features, labels, model_config): - """Define tensor transformations for the feature and label tensors. You can define - tensor transformations you want to apply to the features and labels tensors before - they are passed to the model. + """Define tensor transformations for the feature and label tensors. Args: features: A feature dictionary of column names to feature tensors. @@ -87,7 +87,7 @@ def transform_tensorflow(features, labels, model_config): Returns: features and labels tensors. """ - pass + return features, labels ``` ## Example @@ -97,12 +97,6 @@ import tensorflow as tf def transform_tensorflow(features, labels, model_config): hparams = model_config["hparams"] - - # tensor2tensor model performs flattening and expects this input key, - features["inputs"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) - - # tensor2tensor expects this key and dimensionality - features["targets"] = tf.expand_dims(labels, 0) - + features["image_pixels"] = tf.reshape(features["image_pixels"], hparams["input_shape"]) return features, labels ```