Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tensor2Tensor Example and transform_tensorflow feature #29

Merged
merged 28 commits into from Mar 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 40 additions & 0 deletions docs/applications/implementations/models.md
Expand Up @@ -48,6 +48,8 @@ def create_estimator(run_config, model_config):

## Pre-installed Packages

You can import PyPI packages or your own Python packages to help create more complex models. See [Python Packages](../advanced/python-packages.md) for more details.

The following packages have been pre-installed and can be used in your implementations:

```text
Expand All @@ -60,3 +62,41 @@ packaging==19.0.0
```

You can install additional PyPI packages and import your own Python packages. See [Python Packages](../advanced/python-packages.md) for more details.


# Tensorflow Transformations
You can preprocess input features and labels to your model by defining a `transform_tensorflow` function. You can define tensor transformations you want to apply to the features and labels tensors before they are passed to the model.

## Implementation

```python
def transform_tensorflow(features, labels, model_config):
1vn marked this conversation as resolved.
Show resolved Hide resolved
deliahu marked this conversation as resolved.
Show resolved Hide resolved
"""Define tensor transformations for the feature and label tensors.

Args:
features: A feature dictionary of column names to feature tensors.

labels: The label tensor.
deliahu marked this conversation as resolved.
Show resolved Hide resolved

model_config: The Cortex configuration for the model.
Note: nested resources are expanded (e.g. model_config["target_column"])
will be the configuration for the target column, rather than the
name of the target column).


Returns:
features and labels tensors.
"""
return features, labels
```

## Example

```python
import tensorflow as tf

def transform_tensorflow(features, labels, model_config):
hparams = model_config["hparams"]
features["image_pixels"] = tf.reshape(features["image_pixels"], hparams["input_shape"])
return features, labels
```
Expand Up @@ -5,10 +5,7 @@ def create_estimator(run_config, model_config):
hparams = model_config["hparams"]

def model_fn(features, labels, mode, params):
images = features["image_pixels"]
images = tf.reshape(images, [-1] + hparams["input_shape"])
x = images

x = features["image_pixels"]
for i, feature_count in enumerate(hparams["hidden_units"]):
with tf.variable_scope("layer_%d" % i):
if hparams["layer_type"] == "conv":
Expand Down Expand Up @@ -55,3 +52,10 @@ def model_fn(features, labels, mode, params):

estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)
return estimator


def transform_tensorflow(features, labels, model_config):
hparams = model_config["hparams"]

features["image_pixels"] = tf.reshape(features["image_pixels"], hparams["input_shape"])
return features, labels
40 changes: 40 additions & 0 deletions examples/mnist/implementations/models/t2t.py
@@ -0,0 +1,40 @@
import tensorflow as tf
from tensor2tensor.utils import trainer_lib
from tensor2tensor import models # pylint: disable=unused-import
from tensor2tensor import problems # pylint: disable=unused-import
from tensor2tensor.data_generators import problem_hparams
from tensor2tensor.utils import registry


def create_estimator(run_config, model_config):
# t2t expects these keys in run_config
run_config.data_parallelism = None
run_config.t2t_device_info = {"num_async_replicas": 1}

# t2t has its own set of hyperparameters we can use
hparams = trainer_lib.create_hparams("basic_fc_small")
1vn marked this conversation as resolved.
Show resolved Hide resolved
problem = registry.problem("image_mnist")
p_hparams = problem.get_hparams(hparams)
hparams.problem = problem
hparams.problem_hparams = p_hparams

# don't need eval_metrics
problem.eval_metrics = lambda: []

# t2t expects this key
hparams.warm_start_from = None

estimator = trainer_lib.create_estimator("basic_fc_relu", hparams, run_config)
return estimator


def transform_tensorflow(features, labels, model_config):
hparams = model_config["hparams"]

# t2t model performs flattening and expects this input key
features["inputs"] = tf.reshape(features["image_pixels"], hparams["input_shape"])

# t2t expects this key and dimensionality
features["targets"] = tf.expand_dims(labels, 0)

return features, labels
deliahu marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions examples/mnist/requirements.txt
@@ -1 +1,2 @@
pillow==5.4.1
tensor2tensor==1.10.0
10 changes: 8 additions & 2 deletions examples/mnist/resources/apis.yaml
@@ -1,6 +1,6 @@
- kind: api
name: dense-classifier
model_name: dense
name: dnn-classifier
model_name: dnn
compute:
replicas: 1

Expand All @@ -9,3 +9,9 @@
model_name: conv
compute:
replicas: 1

- kind: api
name: t2t-classifier
model_name: t2t
compute:
replicas: 1
23 changes: 9 additions & 14 deletions examples/mnist/resources/models.yaml
@@ -1,26 +1,22 @@
- kind: model
name: dense
path: implementations/models/basic.py
name: dnn
path: implementations/models/dnn.py
type: classification
target_column: label
feature_columns:
- image_pixels
hparams:
layer_type: basic
learning_rate: 0.01
input_shape: [784]
output_shape: [10]
hidden_units: [100, 200, 10]
hidden_units: [100, 200]
data_partition_ratio:
training: 0.7
evaluation: 0.3
training:
batch_size: 64
num_epochs: 5

- kind: model
name: conv
1vn marked this conversation as resolved.
Show resolved Hide resolved
path: implementations/models/basic.py
path: implementations/models/custom.py
type: classification
target_column: label
feature_columns:
Expand All @@ -30,7 +26,7 @@
learning_rate: 0.01
input_shape: [28, 28, 1]
output_shape: [10]
kernel_size: 2
kernel_size: 4
hidden_units: [10, 10, 10]
data_partition_ratio:
training: 0.7
Expand All @@ -39,18 +35,17 @@
batch_size: 64
num_epochs: 5


- kind: model
name: dnn
path: implementations/models/dnn.py
name: t2t
path: implementations/models/t2t.py
type: classification
target_column: label
feature_columns:
- image_pixels
prediction_key: outputs
deliahu marked this conversation as resolved.
Show resolved Hide resolved
hparams:
learning_rate: 0.01
input_shape: [28, 28, 1]
output_shape: [10]
hidden_units: [100, 200]
data_partition_ratio:
training: 0.7
evaluation: 0.3
Expand Down
3 changes: 2 additions & 1 deletion pkg/workloads/lib/context.py
Expand Up @@ -460,7 +460,8 @@ def resource_status_key(self, resource):


MODEL_IMPL_VALIDATION = {
"required": [{"name": "create_estimator", "args": ["run_config", "model_config"]}]
"required": [{"name": "create_estimator", "args": ["run_config", "model_config"]}],
"optional": [{"name": "transform_tensorflow", "args": ["features", "labels", "model_config"]}],
}

AGGREGATOR_IMPL_VALIDATION = {
Expand Down
7 changes: 3 additions & 4 deletions pkg/workloads/tf_api/api.py
Expand Up @@ -89,12 +89,11 @@ def transform_sample(sample):

def create_prediction_request(transformed_sample):
ctx = local_cache["ctx"]

signatureDef = local_cache["metadata"]["signatureDef"]
signature_key = list(signatureDef.keys())[0]
prediction_request = predict_pb2.PredictRequest()
prediction_request.model_spec.name = "default"
prediction_request.model_spec.signature_name = list(
local_cache["metadata"]["signatureDef"].keys()
)[0]
prediction_request.model_spec.signature_name = signature_key

for column_name, value in transformed_sample.items():
data_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[column_name]["type"]]
Expand Down
53 changes: 33 additions & 20 deletions pkg/workloads/tf_train/train_util.py
Expand Up @@ -33,6 +33,24 @@ def get_input_placeholder(model_name, ctx, training=True):
return input_placeholder


def get_label_placeholder(model_name, ctx):
model = ctx.models[model_name]

target_column_name = model["target_column"]
column_type = tf_lib.CORTEX_TYPE_TO_TF_TYPE[ctx.columns[target_column_name]["type"]]
return tf.placeholder(shape=[None], dtype=column_type)


def get_transform_tensor_fn(ctx, model_impl, model_name):
model = ctx.models[model_name]
model_config = ctx.model_config(model["name"])

def transform_tensor_fn_wrapper(inputs, labels):
return model_impl.transform_tensorflow(inputs, labels, model_config)
1vn marked this conversation as resolved.
Show resolved Hide resolved

return transform_tensor_fn_wrapper


def generate_example_parsing_fn(model_name, ctx, training=True):
model = ctx.models[model_name]

Expand All @@ -47,7 +65,7 @@ def _parse_example(example_proto):


# Mode must be "training" or "evaluation"
def generate_input_fn(model_name, ctx, mode):
def generate_input_fn(model_name, ctx, mode, model_impl):
model = ctx.models[model_name]

filenames = ctx.get_training_data_parts(model_name, mode)
Expand All @@ -66,6 +84,9 @@ def _input_fn():
if model[mode]["shuffle"]:
dataset = dataset.shuffle(buffer_size)

if hasattr(model_impl, "transform_tensorflow"):
dataset = dataset.map(get_transform_tensor_fn(ctx, model_impl, model_name))

dataset = dataset.batch(model[mode]["batch_size"])
dataset = dataset.prefetch(buffer_size)
dataset = dataset.repeat()
Expand All @@ -77,27 +98,19 @@ def _input_fn():
return _input_fn


def generate_json_serving_input_fn(model_name, ctx):
def generate_json_serving_input_fn(model_name, ctx, model_impl):
def _json_serving_input_fn():
inputs = get_input_placeholder(model_name, ctx, training=False)
features = {key: tf.expand_dims(tensor, -1) for key, tensor in inputs.items()}
return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=inputs)

return _json_serving_input_fn
labels = get_label_placeholder(model_name, ctx)

features = {key: tensor for key, tensor in inputs.items()}
if hasattr(model_impl, "transform_tensorflow"):
features, _ = get_transform_tensor_fn(ctx, model_impl, model_name)(features, labels)

def generate_example_serving_input_fn(model_name, ctx):
def _example_serving_input_fn():
feature_spec = tf_lib.get_feature_spec(model_name, ctx, training=False)
example_bytestring = tf.placeholder(shape=[None], dtype=tf.string)
feature_scalars = tf.parse_single_example(example_bytestring, feature_spec)
features = {key: tf.expand_dims(tensor, -1) for key, tensor in feature_scalars.items()}

return tf.estimator.export.ServingInputReceiver(
features=features, receiver_tensors={"example_proto": example_bytestring}
)
features = {key: tf.expand_dims(tensor, 0) for key, tensor in features.items()}
return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=inputs)

return _example_serving_input_fn
return _json_serving_input_fn


def get_regression_eval_metrics(labels, predictions):
Expand Down Expand Up @@ -130,9 +143,9 @@ def train(model_name, model_impl, ctx, model_dir):
model_dir=model_dir,
)

train_input_fn = generate_input_fn(model_name, ctx, "training")
eval_input_fn = generate_input_fn(model_name, ctx, "evaluation")
serving_input_fn = generate_json_serving_input_fn(model_name, ctx)
train_input_fn = generate_input_fn(model_name, ctx, "training", model_impl)
eval_input_fn = generate_input_fn(model_name, ctx, "evaluation", model_impl)
serving_input_fn = generate_json_serving_input_fn(model_name, ctx, model_impl)
exporter = tf.estimator.FinalExporter("estimator", serving_input_fn, as_text=False)

dataset_metadata = aws.read_json_from_s3(model["dataset"]["metadata_key"], ctx.bucket)
Expand Down