diff --git a/.github/workflows/cd-docs.yml b/.github/workflows/cd-docs.yml new file mode 100644 index 0000000..05980ff --- /dev/null +++ b/.github/workflows/cd-docs.yml @@ -0,0 +1,49 @@ +name: deploy-docs +on: + workflow_dispatch: + pull_request: + push: + branches: + - 'master' +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + cache-dependency-path: | + setup.py + + - name: Save time for cache for mkdocs + run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + + - name: Caching + uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + + - name: Install Dependencies + run: pip install -r requirements-docs.txt + + - name: Deploy to GitHub Pages + if: (github.event_name != 'pull_request') + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + mkdocs gh-deploy --force + + - name: Build docs to check for errors + if: (github.event_name == 'pull_request') + run: | + mkdocs build diff --git a/README.md b/README.md index 75114ae..46db419 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ examples of struct2tensor in action and an introduction to the main concepts. You can [run the notebook in your browser](https://colab.research.google.com/github/google/struct2tensor/blob/master/examples/prensor_playground.ipynb) through Google's colab environment, or [download the -file](examples/prensor_playground.ipynb) to run it in your own Jupyter +file](https://github.com/google/struct2tensor/tree/master/g3doc/examples/prensor_playground.ipynb) to run it in your own Jupyter environment. @@ -99,8 +99,9 @@ struct2tensor needs a couple of custom TensorFlow ops to function. If you train a model with struct2tensor and wants to serve it with TensorFlow Serving, the TensorFlow Serving binary needs to link with those custom ops. We have a pre-built docker image that contains such a binary. The `Dockerfile` is -available at `tools/tf_serving_docker/Dockerfile`. The image is available at -`gcr.io/tfx-oss-public/s2t_tf_serving`. +available at [`tools/tf_serving_docker/Dockerfile`](https://github.com/google/struct2tensor/blob/master/struct2tensor/tools/tf_serving_docker/Dockerfile). +The image is available at +[`gcr.io/tfx-oss-public/s2t_tf_serving`](gcr.io/tfx-oss-public/s2t_tf_serving). Please see the `Dockerfile` for details. But in brief, the image exposes port 8500 as the gRPC endpoint and port 8501 as the REST endpoint. You can set diff --git a/g3doc/api_docs/python/_toc.yaml b/docs/api_docs/python/_toc.yaml similarity index 100% rename from g3doc/api_docs/python/_toc.yaml rename to docs/api_docs/python/_toc.yaml diff --git a/docs/api_docs/python/expression_impl/expression_impl.md b/docs/api_docs/python/expression_impl/expression_impl.md new file mode 100644 index 0000000..5c805f6 --- /dev/null +++ b/docs/api_docs/python/expression_impl/expression_impl.md @@ -0,0 +1,3 @@ +# `expression_impl` + +::: struct2tensor.expression_impl diff --git a/docs/api_docs/python/expression_impl/index.md b/docs/api_docs/python/expression_impl/index.md new file mode 100644 index 0000000..8afc031 --- /dev/null +++ b/docs/api_docs/python/expression_impl/index.md @@ -0,0 +1,58 @@ +# Module: expression_impl + +
+ +
+ + View source on GitHub +
+
+
+ +The modules in this file should be accessed like the following: + +```python +import struct2tensor as s2t +from struct2tensor import expression_impl + +s2t.expression_impl.apply_schema +``` + +## Modules + +[`apply_schema`][struct2tensor.expression_impl.apply_schema] module: Apply a schema to an expression. + +[`broadcast`][struct2tensor.expression_impl.broadcast] module: Methods for broadcasting a path in a tree. + +[`depth_limit`][struct2tensor.expression_impl.depth_limit] module: Caps the depth of an expression. + +[`filter_expression`][struct2tensor.expression_impl.filter_expression] module: Create a new expression that is a filtered version of an original one. + +[`index`][struct2tensor.expression_impl.index] module: get_positional_index and get_index_from_end methods. + +[`map_prensor`][struct2tensor.expression_impl.map_prensor] module: Arbitrary operations from sparse and ragged tensors to a leaf field. + +[`map_prensor_to_prensor`][struct2tensor.expression_impl.map_prensor_to_prensor] module: Arbitrary operations from prensors to prensors in an expression. + +[`map_values`][struct2tensor.expression_impl.map_values] module: Maps the values of various leaves of the same child to a single result. + +[`parquet`][struct2tensor.expression_impl.parquet] module: Apache Parquet Dataset. + +[`placeholder`][struct2tensor.expression_impl.placeholder] module: Placeholder expression. + +[`project`][struct2tensor.expression_impl.project] module: project selects a subtree of an expression. + +[`promote`][struct2tensor.expression_impl.promote] module: Promote an expression to be a child of its grandparent. + +[`promote_and_broadcast`][struct2tensor.expression_impl.promote_and_broadcast] module: promote_and_broadcast a set of nodes. + +[`proto`][struct2tensor.expression_impl.proto] module: Expressions to parse a proto. + +[`reroot`][struct2tensor.expression_impl.reroot] module: Reroot to a subtree, maintaining an input proto index. + +[`size`][struct2tensor.expression_impl.size] module: Functions for creating new size or has expression. + +[`slice_expression`][struct2tensor.expression_impl.slice_expression] module: Implementation of slice. + diff --git a/docs/api_docs/python/s2t/index.md b/docs/api_docs/python/s2t/index.md new file mode 100644 index 0000000..5b1f113 --- /dev/null +++ b/docs/api_docs/python/s2t/index.md @@ -0,0 +1,67 @@ +# Module: s2t + +
+ +
+ + View source on GitHub +
+
+
+ +Import core names for struct2tensor. + +## Classes + +[`class ChildNodeTensor`][struct2tensor.ChildNodeTensor]: The value of an intermediate node. + +[`class Expression`][struct2tensor.Expression]: An expression represents the calculation of a prensor object. + +[`class LeafNodeTensor`][struct2tensor.LeafNodeTensor]: The value of a leaf node. + +[`class Path`][struct2tensor.Path]: A representation of a path in the expression. + +[`class Prensor`][struct2tensor.Prensor]: A expression of NodeTensor objects. + +[`class RootNodeTensor`][struct2tensor.RootNodeTensor]: The value of the root. + +## Functions + +[`calculate_prensors(...)`][struct2tensor.calculate_prensors]: Gets the prensor value of the expressions. + +[`calculate_prensors_with_graph(...)`][struct2tensor.calculate_prensors_with_graph]: Gets the prensor value of the expressions and the graph used. + +[`calculate_prensors_with_source_paths(...)`][struct2tensor.calculate_prensors_with_source_paths]: Returns a list of prensor trees, and proto summaries. + +[`create_expression_from_file_descriptor_set(...)`][struct2tensor.create_expression_from_file_descriptor_set]: Create an expression from a 1D tensor of serialized protos. + +[`create_expression_from_prensor(...)`][struct2tensor.create_expression_from_prensor]: Gets an expression representing the prensor. + +[`create_expression_from_proto(...)`][struct2tensor.create_expression_from_proto]: Create an expression from a 1D tensor of serialized protos. + +[`create_path(...)`][struct2tensor.create_path]: Create a path from an object. + +[`create_prensor_from_descendant_nodes(...)`][struct2tensor.create_prensor_from_descendant_nodes]: Create a prensor from a map of paths to NodeTensor. + +[`create_prensor_from_root_and_children(...)`][struct2tensor.create_prensor_from_root_and_children] + +[`get_default_options(...)`][struct2tensor.get_default_options]: Get the default options. + +[`get_options_with_minimal_checks(...)`][struct2tensor.get_options_with_minimal_checks]: Options for calculation with minimal runtime checks. + +[`get_ragged_tensor(...)`][struct2tensor.get_ragged_tensor]: Get a ragged tensor for a path. (deprecated) + +[`get_ragged_tensors(...)`][struct2tensor.get_ragged_tensors]: Gets ragged tensors for all the leaves of the prensor expression. (deprecated) + +[`get_sparse_tensor(...)`][struct2tensor.get_sparse_tensor]: Gets a sparse tensor for path p. (deprecated) + +[`get_sparse_tensors(...)`][struct2tensor.get_sparse_tensors]: Gets sparse tensors for all the leaves of the prensor expression. (deprecated) + +## Type Aliases + +[`NodeTensor`][struct2tensor.NodeTensor] + +[`Step`][struct2tensor.Step] + diff --git a/docs/api_docs/python/s2t/s2t.md b/docs/api_docs/python/s2t/s2t.md new file mode 100644 index 0000000..798b4e3 --- /dev/null +++ b/docs/api_docs/python/s2t/s2t.md @@ -0,0 +1,4 @@ +# `s2t` + +::: struct2tensor.struct2tensor + diff --git a/docs/assets/examples/prensor_playground/aqOX7nS.png b/docs/assets/examples/prensor_playground/aqOX7nS.png new file mode 100644 index 0000000..a294533 Binary files /dev/null and b/docs/assets/examples/prensor_playground/aqOX7nS.png differ diff --git a/docs/assets/favicon.png b/docs/assets/favicon.png new file mode 100644 index 0000000..00a8af6 Binary files /dev/null and b/docs/assets/favicon.png differ diff --git a/docs/examples/prensor_playground.ipynb b/docs/examples/prensor_playground.ipynb new file mode 100644 index 0000000..a9904f4 --- /dev/null +++ b/docs/examples/prensor_playground.ipynb @@ -0,0 +1,2207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "PHuwKiTF02Zq" + }, + "source": [ + "# Your structured data into Tensorflow.\n", + "\n", + "ML training often expects _flat_ data, like a line in a CSV.\n", + "[tf.Example](https://www.tensorflow.org/api_docs/python/tf/train/Example) was\n", + "designed to represent flat data. But the data you care about and want to predict\n", + "things about usually starts out _structured_.\n", + "\n", + "Over and over again you have to write transform code that turns your structured data into Tensors. This repetitive transform code must be rewritten over and over for all your ML pipelines both for training _and_ serving! And it lets bugs slip into your ML pipeline. \n", + "\n", + "`struct2tensor` lets you take advantage of structured data _within_ your ML pipelines. It is:\n", + "\n", + "* **for**: ML Engineers \n", + "* **who**: train models on data that starts out structured\n", + "* **it is**: a python library \n", + "* **that**: transforms your structured data into model-friendly (Sparse, Raggged, Dense, ...) tensors hermetically _within_ your model\n", + "* **unlike**: writing custom transforms over and over for training and serving.\n", + "---\n", + "![struct2tensor diagram showing the transform happens in the model](../../assets/examples/prensor_playground/aqOX7nS.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7ozCmoF33ogj" + }, + "source": [ + "# Demo example\n", + "\n", + "Suppose we have this _structured_ data we want to train on. The source example data format is a [protobuff](https://developers.google.com/protocol-buffers). `struct2tensor` was built internally and works on protobuffers now. It can be extended to parquet, json, etc. in the future.\n", + "\n", + "```\n", + "# e.g. a web session\n", + "message Session{\n", + " message SessionInfo {\n", + " string session_feature = 1;\n", + " double session_duration_sec = 2;\n", + " }\n", + " SessionInfo session_info = 1;\n", + " message Event {\n", + " string query = 1;\n", + " message Action {\n", + " int number_of_views = 1;\n", + " }\n", + " repeated Action action = 2;\n", + " }\n", + " repeated Event event = 2;\n", + "}\n", + "```\n", + "\n", + "\n", + "In 3 steps we'll extract the fields we want with `struct2tensor`. We'll end up with batch-aligned `SparseTensors`:\n", + "\n", + "1. Tell our model what examples we care about, e.g. **`event`** (submessage `Session::Event`).\n", + "2. Pick the proto fields that we think are good features, say:\n", + " * `session_info.session_feature`\n", + " * `event.query`\n", + "3. Identify the label to predict, say **`event.action.number_of_views`** (the actual label could be sum(action.number_of_views for action in event))\n", + "\n", + "\n", + "Then we can build a struct2tensor query that:\n", + "* parses instances of this protocol buffer\n", + "* transforms the fields we care about\n", + "* creates the necessary `SparseTensor`s\n", + "\n", + "Don't worry about some of these terms yet. We'll show you an example. And then explain the terms later." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w2RkJ6mN2Y6-" + }, + "source": [ + "## Install required packages (internal colab users: skip)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 461 + }, + "id": "q3REXR58msJe", + "outputId": "9b3a6130-83ce-46af-fe61-44a8cb95b4d8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: struct2tensor in /usr/local/lib/python3.6/dist-packages (0.0.1.dev6)\n", + "Requirement already satisfied: tensorflow-metadata>=0.13.0 in /usr/local/lib/python3.6/dist-packages (from struct2tensor) (0.15.1)\n", + "Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from struct2tensor) (3.10.0)\n", + "Requirement already satisfied: tensorflow==1.15.0 in /usr/local/lib/python3.6/dist-packages (from struct2tensor) (1.15.0)\n", + "Requirement already satisfied: googleapis-common-protos in /usr/local/lib/python3.6/dist-packages (from tensorflow-metadata>=0.13.0->struct2tensor) (1.6.0)\n", + "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.8.0->struct2tensor) (1.12.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.8.0->struct2tensor) (42.0.1)\n", + "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.17.4)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (3.1.0)\n", + "Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.1.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.1.0)\n", + "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (0.33.6)\n", + "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.15.0)\n", + "Requirement already satisfied: tensorboard<1.16.0,>=1.15.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.15.0)\n", + "Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.0.8)\n", + "Requirement already satisfied: tensorflow-estimator==1.15.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.15.1)\n", + "Requirement already satisfied: google-pasta>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (0.1.8)\n", + "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (0.2.2)\n", + "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (1.11.2)\n", + "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (0.8.1)\n", + "Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0->struct2tensor) (0.8.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.0->struct2tensor) (0.16.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.16.0,>=1.15.0->tensorflow==1.15.0->struct2tensor) (3.1.1)\n", + "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.8->tensorflow==1.15.0->struct2tensor) (2.8.0)\n", + "Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (0.10.1)\n" + ] + } + ], + "source": [ + "#@test {\"skip\": true} \n", + "# install struct2tensor\n", + "!pip install struct2tensor\n", + "# graphviz for pretty output\n", + "!pip install graphviz" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dIxHSM3VQfUu" + }, + "source": [ + "## Some Pretty Printing and Imports\n", + "\n", + "(not the \"real\" work yet)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "colab": { + "height": 34 + }, + "executionInfo": { + "elapsed": 437, + "status": "ok", + "timestamp": 1600375610067, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "lc5KF8MILYrS", + "outputId": "5b8c3534-db19-4f98-cc12-f44716bc402a", + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type-specific pretty printing ready to go\n" + ] + } + ], + "source": [ + "import base64\n", + "import numpy as np\n", + "import pprint\n", + "import os\n", + "import tensorflow\n", + "from graphviz import Source\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from IPython.display import Image\n", + "from IPython.lib import pretty\n", + "\n", + "import struct2tensor as s2t\n", + "from struct2tensor.test import test_pb2\n", + "from google.protobuf import text_format\n", + "\n", + "\n", + "def _display(graph):\n", + " \"\"\"Renders a graphviz digraph.\"\"\"\n", + " s = Source(graph)\n", + " s.format='svg'\n", + " return s\n", + " \n", + "\n", + "def _create_query_from_text_sessions(text_sessions):\n", + " \"\"\"Creates a struct2tensor query from a list of pbtxt of struct2tensor.test.Session.\"\"\"\n", + " sessions = tf.constant([\n", + " text_format.Merge(\n", + " text_session, \n", + " test_pb2.Session()\n", + " ).SerializeToString()\n", + " for text_session in text_sessions\n", + " ])\n", + " return s2t.create_expression_from_proto(\n", + " sessions, test_pb2.Session.DESCRIPTOR)\n", + "\n", + "def _prensor_pretty_printer(prensor, p, cycle):\n", + " \"\"\"Pretty printing function for struct2tensor.prensor.Prensor\"\"\"\n", + " pretty.pprint(prensor.get_sparse_tensors())\n", + "\n", + "def _sp_pretty_printer(sp, p, cycle):\n", + " \"\"\"Pretty printing function for SparseTensor.\"\"\"\n", + "\n", + " del cycle\n", + " p.begin_group(4, \"SparseTensor(\")\n", + " p.text(\"values={}, \".format(sp.values.numpy().tolist()))\n", + " p.text(\"dense_shape={}, \".format(sp.dense_shape.numpy().tolist()))\n", + " p.break_()\n", + " p.text(\"indices={}\".format(sp.indices.numpy().tolist()))\n", + " p.end_group(4, \")\")\n", + "\n", + "\n", + "pretty.for_type(tf.SparseTensor, _sp_pretty_printer)\n", + "pretty.for_type(s2t.Prensor, _prensor_pretty_printer)\n", + "\n", + "_pretty_print = pretty.pprint\n", + "\n", + "print(\"type-specific pretty printing ready to go\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sEsOWNuz7jC7" + }, + "source": [ + "## The real work:\n", + "\n", + "A function that parses our structured data (protobuffers) into tensors:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 34 + }, + "executionInfo": { + "elapsed": 378, + "status": "ok", + "timestamp": 1600375613528, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "ZC-oUzvBoPjA", + "outputId": "7a19c4e5-0cb1-479d-9245-e302a932448e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defined the workhorse func: (structured data at rest) -> (tensors)\n" + ] + } + ], + "source": [ + "@tf.function(input_signature=[tf.TensorSpec(shape=(None), dtype=tf.string)], autograph=False)\n", + "def parse_session(serialized_sessions):\n", + " \"\"\"A TF function parsing a batch of serialized Session protos into tensors.\n", + "\n", + " It is a TF graph that takes one 1-D tensor as input, and outputs a\n", + " Dict[str, tf.SparseTensor]\n", + " \"\"\"\n", + " query = s2t.create_expression_from_proto(\n", + " serialized_sessions, test_pb2.Session.DESCRIPTOR)\n", + " # Move all the fields of our interest to under \"event\". \n", + " query = query.promote_and_broadcast({\n", + " \"session_feature\": \"session_info.session_feature\",\n", + " \"action_number_of_views\": \"event.action.number_of_views\" },\n", + " \"event\")\n", + " # Specify \"event\" to be examples.\n", + " query = query.reroot(\"event\")\n", + " # Extract all the fields of our interest.\n", + " projection = query.project([\"session_feature\", \"query\", \"action_number_of_views\"]) \n", + " prensors = s2t.calculate_prensors([projection])\n", + " \n", + " output_sparse_tensors = {}\n", + " for prensor in prensors:\n", + " path_to_tensor = prensor.get_sparse_tensors()\n", + " output_sparse_tensors.update({str(k): v for k, v in path_to_tensor.items()})\n", + " \n", + " return output_sparse_tensors\n", + "\n", + "print(\"Defined the workhorse func: (structured data at rest) -> (tensors)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VKp4SxTgPzpe" + }, + "source": [ + "## Lets see it in action:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 119 + }, + "executionInfo": { + "elapsed": 569, + "status": "ok", + "timestamp": 1600375616071, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "-cIlFypdPeZX", + "outputId": "7840d783-249c-4be2-d675-10f8e817dded" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'action_number_of_views': SparseTensor(values=[1, 2, 3], dense_shape=[2, 2], \n", + " indices=[[0, 0], [1, 0], [1, 1]]),\n", + " 'query': SparseTensor(values=[b'Hello', b'world'], dense_shape=[2], \n", + " indices=[[0], [1]]),\n", + " 'session_feature': SparseTensor(values=[b'foo', b'foo'], dense_shape=[2, 1], \n", + " indices=[[0, 0], [1, 0]])}\n" + ] + } + ], + "source": [ + "serialized_sessions = tf.constant([\n", + " text_format.Merge(\n", + " \"\"\"\n", + " session_info {\n", + " session_duration_sec: 1.0\n", + " session_feature: \"foo\"\n", + " }\n", + " event {\n", + " query: \"Hello\"\n", + " action {\n", + " number_of_views: 1\n", + " }\n", + " action {\n", + " }\n", + " }\n", + " event {\n", + " query: \"world\"\n", + " action {\n", + " number_of_views: 2\n", + " }\n", + " action {\n", + " number_of_views: 3\n", + " }\n", + " }\n", + " \"\"\",\n", + " test_pb2.Session()\n", + " ).SerializeToString()\n", + "])\n", + "\n", + "_pretty_print(parse_session(serialized_sessions))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pW9zBjNMWMIH" + }, + "source": [ + "See how we went from our pre-pipeline data (the Protobuffer) all the way to the structured data, packed into `SparseTensor`s?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QY7wy_6loF4v" + }, + "source": [ + "# Digging Far Deeper\n", + "Interested and want to learn more? Read on...\n", + "\n", + "Let's define several terms we mentioned before:\n", + "\n", + "### Prensor\n", + "\n", + "A Prensor (protobuffer + tensor) is a data structure storing the data we work on. We use protobuffers a lot at Google. `struct2tensor` can support other structured formats, too.\n", + "\n", + "For example, throughout this colab we will be using proto\n", + "[`struct2tensor.test.Session`](http://cs/symbol:struct2tensor.test.Session). A schematic visualization\n", + "of a selected part of the prensor from that proto looks like:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 543 + }, + "id": "ZTjNwx4bBXFk", + "outputId": "4927fdf8-0d2c-46e6-ee2f-a1b2bf4a298f" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "session_id\n", + "\n", + "session_id\n", + "\n", + "\n", + "\n", + "session->session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n", + "action\n", + "\n", + "action\n", + "\n", + "\n", + "\n", + "event->action\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "query_token\n", + "\n", + "query_token\n", + "\n", + "\n", + "\n", + "event->query_token\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "number_of_views\n", + "\n", + "number_of_views\n", + "\n", + "\n", + "\n", + "action->number_of_views\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display(\"\"\"\n", + "digraph {\n", + " root -> session [label=\"*\"];\n", + " session -> event [label=\"*\"];\n", + " session -> session_id [label=\"?\"];\n", + " event -> action [label=\"*\"];\n", + " event -> query_token [label=\"*\"]\n", + " action -> number_of_views [label=\"?\"];\n", + "}\n", + "\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P0aBK7NbO0wp" + }, + "source": [ + "We will be using visualizations like this to demostrate struct2tensor queries later.\n", + "\n", + "Note:\n", + "\n", + "* The \"*\" on the edge means the pointed node has repeated values; while the \"?\" means it has an optional value.\n", + "* There is always a \"root\" node whose only child is the root of the structure. Note that it's \"repeated\" because one struct2tensorTree can represent multiple instances of a structure.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5stZYn1dBXdl" + }, + "source": [ + "\n", + "### struct2tensor Query\n", + "A struct2tensor query transforms a Prensor into another Prensor.\n", + "\n", + "For example, `broadcast` is a query that replicates a node as a child of one of its siblings.\n", + "\n", + "Applying\n", + "```\n", + "broadcast(\n", + " source_path=\"session.session_id\",\n", + " sibling=\"event\",\n", + " new_field_name=\"session_session_id\")\n", + "```\n", + "\n", + "on the previous tree gives:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 543 + }, + "id": "zfhF-frzPxZm", + "outputId": "36bd1e53-befe-4bfa-e7da-737bd18a2611" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "session_session_id\n", + "\n", + "session_session_id\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "session_id\n", + "\n", + "session_id\n", + "\n", + "\n", + "\n", + "session->session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n", + "event->session_session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n", + "action\n", + "\n", + "action\n", + "\n", + "\n", + "\n", + "event->action\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "query_token\n", + "\n", + "query_token\n", + "\n", + "\n", + "\n", + "event->query_token\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "number_of_views\n", + "\n", + "number_of_views\n", + "\n", + "\n", + "\n", + "action->number_of_views\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display(\"\"\"\n", + "digraph {\n", + " session_session_id [color=\"red\"];\n", + " root -> session [label=\"*\"];\n", + " session -> event [label=\"*\"];\n", + " session -> session_id [label=\"?\"];\n", + " event -> action [label=\"*\"];\n", + " event -> session_session_id [label=\"?\"];\n", + " event -> query_token [label=\"*\"];\n", + " action -> number_of_views [label=\"?\"];\n", + "}\n", + "\"\"\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eOHokTjBPyHW" + }, + "source": [ + "We will talk about common struct2tensor queries in later sections.\n", + "\n", + "### Projection\n", + "A projection of paths in a Prensor produces another Prensor with just the selected paths.\n", + "\n", + "#### Logical representation of a projection\n", + "The structure of the projected path can be represented losslessly as nested lists. For example, the projection of `event.action.number_of_views` from the struct2tensorTree formed by the following two instances of `struct2tensor.test.Session`:\n", + "```\n", + "{\n", + " event { action { number_of_views: 1} action { number_of_views: 2} action {} }\n", + " event {}\n", + "}, {\n", + " event { action { number_of_views: 3} }\n", + "}\n", + "```\n", + "\n", + "is:\n", + "\n", + "```\n", + "[ # the outer list has two elements b/c there are two Session protos.\n", + " [ # the first proto has two events\n", + " [[1],[2],[]], # 3 actions, the last one does not have a number_of_views.\n", + " [], # the second event does not have action\n", + " ],\n", + " [ # the second proto has one event\n", + " [[3]],\n", + " ],\n", + "]\n", + "```\n", + "\n", + "#### Representing nested lists with `tf.SparseTensor`\n", + "\n", + "struct2tensor uses `tf.SparseTensor` to represent the above nested list in the projection results. Note that `tf.SparseTensor` essentially enforces that the lists nested at the same level to have the same length (because the there is a certain size for each dimension), therefore this representation is lossy. The above nested lists, when written as a SparseTensor will look like:\n", + "```\n", + "tf.SparseTensor(\n", + " dense_shape=[2, 2, 3, 1], # each is the maximum length of lists at the same nesting level.\n", + " values = [1, 2, 3],\n", + " indices = [[0, 0, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]\n", + ")\n", + "```\n", + "\n", + "Note that the last dimension is useless: the index of that dimension will always be 0 for any present value because number_of_views is an optional field. So struct2tensors library will actually \"squeeze\" all the optional dimensions.\n", + "\n", + "The actual result would be:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 68 + }, + "executionInfo": { + "elapsed": 352, + "status": "ok", + "timestamp": 1600368919353, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "XvuEY3D3WIP7", + "outputId": "ee33d486-90e7-4328-accc-11f13f82a9db" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{event.action.number_of_views: SparseTensor(values=[1, 2, 3], dense_shape=[2, 2, 3], \n", + " indices=[[0, 0, 0], [0, 0, 1], [1, 0, 0]])}\n", + "]\n" + ] + } + ], + "source": [ + "query = _create_query_from_text_sessions(['''\n", + " event { action { number_of_views: 1} action { number_of_views: 2} action {} }\n", + " event {}\n", + " ''', '''\n", + " event { action { number_of_views: 3} }\n", + " ''']\n", + " ).project([\"event.action.number_of_views\"])\n", + "\n", + "prensor = s2t.calculate_prensors([query])\n", + "pretty.pprint(prensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iaailOJhWLLa" + }, + "source": [ + "struct2tensor's internal data model is closer to the above \"nested lists\" abstraction and sometimes it's easier to reason with \"nested lists\" than with `SparseTensor`s.\n", + "\n", + "Recently, [`tf.RaggedTensor`](https://www.tensorflow.org/guide/ragged_tensor) was introduced to represent nested lists exactly. We are working on adding support for projecting into ragged tensors." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UD48ikK4Eop-" + }, + "source": [ + "## Common struct2tensor Queries\n", + "\n", + "### `promote`\n", + "\n", + "Promotes a node to become a sibling of its parent. If the node is repeated, then all its values are concatenated (the order is preserved)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 426 + }, + "id": "tnbD2lKoDbsk", + "outputId": "244aeef6-48b7-4ae5-f01c-2429bb65a7ea" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "query_token\n", + "\n", + "query_token\n", + "\n", + "\n", + "\n", + "event->query_token\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display('''\n", + "digraph {\n", + " root -> session [label=\"*\"];\n", + " session -> event [label=\"*\"];\n", + " event -> query_token [label=\"*\"];\n", + "}\n", + "''')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OOgJlcHPDyIk" + }, + "source": [ + "`promote(source_path=\"event.query_token\", new_field_name=\"event_query_token\")`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 426 + }, + "id": "xKJ_EraVHyUA", + "outputId": "64c3720d-3672-4625-d8c1-1a4b301d6a2d" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "event_query_token\n", + "\n", + "event_query_token\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "session->event_query_token\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "query_token\n", + "\n", + "query_token\n", + "\n", + "\n", + "\n", + "event->query_token\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display('''\n", + "digraph {\n", + " event_query_token [color=\"red\"];\n", + " root -> session [label=\"*\"];\n", + " session -> event [label=\"*\"];\n", + " session -> event_query_token [label=\"*\"];\n", + " event -> query_token [label=\"*\"];\n", + "}\n", + "''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 68 + }, + "executionInfo": { + "elapsed": 344, + "status": "ok", + "timestamp": 1600296594869, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "oQCcVWd-JDT9", + "outputId": "acad1b9a-0985-46bb-b895-7d085814d20e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{event_query_token: SparseTensor(values=[b'abc', b'def', b'ghi'], dense_shape=[1, 3], \n", + " indices=[[0, 0], [0, 1], [0, 2]])}\n", + "]\n" + ] + } + ], + "source": [ + "query = (_create_query_from_text_sessions([\n", + "\"\"\"\n", + "event {\n", + " query_token: \"abc\"\n", + " query_token: \"def\"\n", + "}\n", + "event {\n", + " query_token: \"ghi\"\n", + "}\n", + "\"\"\"])\n", + " .promote(source_path=\"event.query_token\", new_field_name=\"event_query_token\")\n", + " .project([\"event_query_token\"]))\n", + "\n", + "prensor = s2t.calculate_prensors([query])\n", + "\n", + "_pretty_print(prensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "av9km5d_KM8o" + }, + "source": [ + "The projected structure is like:\n", + "```\n", + "{\n", + " # this is under Session.\n", + " event_query_token: \"abc\"\n", + " event_query_token: \"def\"\n", + " event_query_token: \"ghi\"\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t2BU8warKqFm" + }, + "source": [ + "### `broadcast`\n", + "\n", + "Broadcasts the value of a node to one of its sibling. The value will be replicated if the sibling is repeated. This is similar to TensorFlow and Numpy's [broadcasting semantics](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.broadcasting.html).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 311 + }, + "id": "1hFGEV0DMmOo", + "outputId": "ff23c5ce-17c9-4604-f309-c9b384b6d8ca" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "session_id\n", + "\n", + "session_id\n", + "\n", + "\n", + "\n", + "session->session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display('''\n", + "digraph {\n", + " root -> session [label=\"*\"];\n", + " session -> session_id [label=\"?\"];\n", + " session -> event [label=\"*\"];\n", + "}\n", + "''')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DvBCsWBuNDKV" + }, + "source": [ + "`broadcast(source_path=\"session_id\", sibling_field=\"event\", new_field_name=\"session_session_id\")`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 426 + }, + "id": "wTUHyGvSNMGp", + "outputId": "d1ef4902-72eb-4bd2-e0bb-3008a9de17e7" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "session_session_id\n", + "\n", + "session_session_id\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "session_id\n", + "\n", + "session_id\n", + "\n", + "\n", + "\n", + "session->session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event->session_session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display('''\n", + "digraph {\n", + " session_session_id [color=\"red\"];\n", + " root -> session [label=\"*\"];\n", + " session -> session_id [label=\"?\"];\n", + " session -> event [label=\"*\"];\n", + " event -> session_session_id [label=\"?\"];\n", + "}\n", + "''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 68 + }, + "executionInfo": { + "elapsed": 341, + "status": "ok", + "timestamp": 1600296607633, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "wQE_ceMzNjzv", + "outputId": "d92d9977-1e4c-4138-e89c-94e9c0f4bf5e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{event.session_session_id: SparseTensor(values=[8, 8], dense_shape=[1, 2], \n", + " indices=[[0, 0], [0, 1]])}\n", + "]\n" + ] + } + ], + "source": [ + "query = (_create_query_from_text_sessions([\n", + "\"\"\"\n", + "session_id: 8\n", + "event { }\n", + "event { }\n", + "\"\"\"])\n", + " .broadcast(source_path=\"session_id\",\n", + " sibling_field=\"event\",\n", + " new_field_name=\"session_session_id\")\n", + " .project([\"event.session_session_id\"]))\n", + "\n", + "prensor = s2t.calculate_prensors([query])\n", + "_pretty_print(prensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7fL8pmsQObUT" + }, + "source": [ + "The projected structure is like:\n", + "```\n", + "{\n", + " event {\n", + " session_session_id: 8\n", + " }\n", + " event {\n", + " session_session_id: 8\n", + " }\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ch2WgR9AP23Q" + }, + "source": [ + "### `promote_and_broadcast`\n", + "The query accepts multiple source fields and a destination field. For each source field, it first promotes it to the least common ancestor with the destination field (if necessary), then broadcasts it to the destination field (if necessary).\n", + "\n", + "Usually for the purpose of machine learning, this gives a reasonable flattened representation of nested structures." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-uNfasxgRsvO" + }, + "source": [ + "```\n", + "promote_and_broadcast(\n", + " path_dictionary={\n", + " 'session_info_duration_sec': 'session_info.session_duration_sec'},\n", + " dest_path_parent='event.action')\n", + "```\n", + "is equivalent to:\n", + "```\n", + "promote(source_path='session_info.session_duration_sec',\n", + " new_field_name='anonymous_field1')\n", + "\n", + "broadcast(source_path='anonymous_field1',\n", + " sibling_field='event.action',\n", + " new_field_name='session_info_duration_sec')\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rYkWc_u8SR_k" + }, + "source": [ + "### `map_field_values`\n", + "\n", + "Creates a new node that is a sibling of a leaf node. The values of the new node are results of applying the given function to the values of the source node.\n", + "\n", + "Note that the function provided takes 1-D tensor that contains all the values of the source node as input and should also output a 1-D tensor of the same size, and it should build TF ops.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 68 + }, + "executionInfo": { + "elapsed": 333, + "status": "ok", + "timestamp": 1600296617311, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "DWjA2OFcS4k1", + "outputId": "7a2d8ef5-7111-4b45-81dd-65d8222badae" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{session_id_plus_one: SparseTensor(values=[9, 10], dense_shape=[2], \n", + " indices=[[0], [1]])}\n", + "]\n" + ] + } + ], + "source": [ + "query = (_create_query_from_text_sessions([\n", + "\"\"\"\n", + "session_id: 8\n", + "\"\"\",\n", + "\"\"\"\n", + "session_id: 9\n", + "\"\"\"])\n", + " .map_field_values(\"session_id\", lambda x: tf.add(x, 1), dtype=tf.int64,\n", + " new_field_name=\"session_id_plus_one\")\n", + " .project([\"session_id_plus_one\"]))\n", + " \n", + "prensor = s2t.calculate_prensors([query])\n", + "\n", + "_pretty_print(prensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_NusnDa1T7s5" + }, + "source": [ + "### `reroot`\n", + "\n", + "Makes the given node the new root of the struct2tensorTree. This has two effects:\n", + "\n", + "* restricts the scope of the struct2tensorTree\n", + " + The field paths in all the following queries are relative to the new root\n", + " + There's no way to refer to nodes that are outside the subtree rooted at the new root.\n", + "* changes the batch dimension.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 426 + }, + "id": "OBNPdOlQU6qS", + "outputId": "df56d6d2-72c5-4c90-e2b9-b3fe2bffde00" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "session\n", + "\n", + "session\n", + "\n", + "\n", + "\n", + "root->session\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "session_id\n", + "\n", + "session_id\n", + "\n", + "\n", + "\n", + "session->session_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "session->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event_id\n", + "\n", + "event_id\n", + "\n", + "\n", + "\n", + "event->event_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display('''\n", + "digraph {\n", + " root -> session [label=\"*\"];\n", + " session -> session_id [label=\"?\"];\n", + " session -> event [label=\"*\"];\n", + " event -> event_id [label=\"?\"];\n", + "}\n", + "''')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HFlI3zMtU6Ev" + }, + "source": [ + "`reroot(\"event\")`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 311 + }, + "id": "uANFWQA-T68W", + "outputId": "659a11b1-39bb-456d-de90-b2675182ca25" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "\n", + "root\n", + "\n", + "root\n", + "\n", + "\n", + "\n", + "event\n", + "\n", + "event\n", + "\n", + "\n", + "\n", + "root->event\n", + "\n", + "\n", + "*\n", + "\n", + "\n", + "\n", + "event_id\n", + "\n", + "event_id\n", + "\n", + "\n", + "\n", + "event->event_id\n", + "\n", + "\n", + "?\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@test {\"skip\": true} \n", + "\n", + "_display('''\n", + "digraph {\n", + " root -> event [label=\"*\"];\n", + " event -> event_id [label=\"?\"];\n", + "}\n", + "''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 459 + }, + "executionInfo": { + "elapsed": 408, + "status": "ok", + "timestamp": 1600297140484, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "lMoeHHllVeet", + "outputId": "631afc7a-ebb3-456f-d0a1-11eeeccee95c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Assume the following Sessions: \n", + "[session_id: 1\n", + "event {\n", + " event_id: \"a\"\n", + "}\n", + "event {\n", + " event_id: \"b\"\n", + "}\n", + ", session_id: 2\n", + ", session_id: 3\n", + "event {\n", + " event_id: \"c\"\n", + "}\n", + "]\n", + "\n", + "\n", + "project([\"event.event_id\"]) before reroot() (the batch dimension is the index to sessions):\n", + "[{event.event_id: SparseTensor(values=[b'a', b'b', b'c'], dense_shape=[3, 2], \n", + " indices=[[0, 0], [0, 1], [2, 0]])}\n", + "]\n", + "\n", + "\n", + "project([\"event_id\"]) after reroot() (the batch dimension becomes the index to events):\n", + "[{event_id: SparseTensor(values=[b'a', b'b', b'c'], dense_shape=[3], \n", + " indices=[[0], [1], [2]])}\n", + "]\n" + ] + } + ], + "source": [ + "#@title { display-mode: \"form\" }\n", + "text_protos = [\"\"\"\n", + "session_id: 1\n", + "event {\n", + " event_id: \"a\"\n", + "}\n", + "event {\n", + " event_id: \"b\"\n", + "}\n", + "\"\"\",\n", + "\"\"\"\n", + "session_id: 2\n", + "\"\"\",\n", + "\"\"\"\n", + "session_id: 3\n", + "event {\n", + " event_id: \"c\"\n", + "}\n", + "\"\"\"\n", + "]\n", + "print(\"\"\"Assume the following Sessions: \"\"\")\n", + "print([text_format.Merge(p, s2t.test.test_pb2.Session()) for p in text_protos])\n", + "print(\"\\n\")\n", + "reroot_example_query = _create_query_from_text_sessions(text_protos)\n", + "\n", + "print(\"\"\"project([\"event.event_id\"]) before reroot() (the batch dimension is the index to sessions):\"\"\")\n", + "_pretty_print(s2t.calculate_prensors([reroot_example_query.project([\"event.event_id\"])]))\n", + "print(\"\\n\")\n", + "print(\"\"\"project([\"event_id\"]) after reroot() (the batch dimension becomes the index to events):\"\"\")\n", + "_pretty_print(s2t.calculate_prensors([reroot_example_query.reroot(\"event\").project([\"event_id\"])]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "70nd50UgLlQU" + }, + "source": [ + "### Proto Map\n", + "\n", + "You can specify a key for the proto map field in a path via brackets.\n", + "\n", + "Given the following tf.Example:\n", + "```\n", + "features {\n", + " feature {\n", + " key: \"my_feature\"\n", + " value {\n", + " float_list {\n", + " value: 1.0\n", + " }\n", + " }\n", + " }\n", + " feature {\n", + " key: \"other_feature\"\n", + " value {\n", + " bytes_list {\n", + " value: \"my_val\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "```\n", + "\n", + "To get the values of `my_feature` and `other_feature`, we can `promote_and_broadcast` and `project` the following paths: `features.feature[my_feature].float_list.value` and `features.feature[other_feature].bytes_list.value`\n", + "\n", + "This results in the following dict of ragged tensors: \n", + "```\n", + "{\n", + " features.my_new_feature: ,\n", + " features.other_new_feature: \n", + "}\n", + "```\n", + "\n", + "Note: we renamed `my_feature` to `my_new_feature` in the `promote_and_broadcast` (and similarly for `other_feature`)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "executionInfo": { + "elapsed": 111, + "status": "ok", + "timestamp": 1632331356173, + "user": { + "displayName": "", + "photoUrl": "", + "userId": "" + }, + "user_tz": 420 + }, + "id": "9ESrpLsEL4SO", + "outputId": "ba002544-d4b2-4ae2-f343-f1f25ab91a4d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{features.my_new_feature: , features.other_new_feature: }\n" + ] + } + ], + "source": [ + "tf_example = text_format.Parse(\"\"\"\n", + "features {\n", + " feature {\n", + " key: \"my_feature\"\n", + " value {\n", + " float_list {\n", + " value: 1.0\n", + " }\n", + " }\n", + " }\n", + " feature {\n", + " key: \"other_feature\"\n", + " value {\n", + " bytes_list {\n", + " value: \"my_val\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\"\"\", tf.train.Example())\n", + "\n", + "query = s2t.create_expression_from_proto(\n", + " tf_example.SerializeToString(), tf.train.Example.DESCRIPTOR)\n", + "query = query.promote_and_broadcast({'my_new_feature': \"features.feature[my_feature].float_list.value\", \"other_new_feature\": \"features.feature[other_feature].bytes_list.value\"}, \"features\")\n", + "query = query.project([\"features.my_new_feature\", \"features.other_new_feature\"])\n", + "[prensor] = s2t.calculate_prensors([query])\n", + "ragged_tensors = prensor.get_ragged_tensors()\n", + "\n", + "print(ragged_tensors)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UxLZtkdmM7lj" + }, + "source": [ + "## Apache Parquet Support\n", + "\n", + "`struct2tensor` offers an [Apache Parquet](https://parquet.apache.org/) [tf.DataSet](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) that allows reading from a Parquet file and apply queries to manipulate the structure of the data.\n", + "\n", + "Because of the powerful struct2tensor library, the dataset will only read the Parquet columns that are required. This reduces I/O cost if we only need a select few columns." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7IHo8w3HMuyT" + }, + "source": [ + "### Preparation\n", + "\n", + "Please run the code cell at [Some Pretty Printing and Imports](#some-pretty-printing-and-imports) to ensure that all required modules are imported, and that pretty print works properly.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Vu02cwvfXVpf" + }, + "source": [ + "#### Prepare the input data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "id": "xEDieagzNL-9", + "outputId": "773de200-cd49-496f-ff3f-60074be06bca" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 1657 100 1657 0 0 8122 0 --:--:-- --:--:-- --:--:-- 8122\n" + ] + } + ], + "source": [ + "# Download our sample data file from the struct2tensor repository. The desciption of the data is below.\n", + "#@test {\"skip\": true} \n", + "\n", + "!curl -o dremel_example.parquet 'https://raw.githubusercontent.com/google/struct2tensor/master/struct2tensor/testdata/parquet_testdata/dremel_example.parquet'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gn5J62JkNN-P" + }, + "source": [ + "### Example\n", + "\n", + "We will use a sample Parquet data file (*dremel_example.parquet*), which contains data based on the example used in this paper: https://storage.googleapis.com/pub-tools-public-publication-data/pdf/36632.pdf \n", + "\n", + "The file *dremel_example.parquet* has the following schema:\n", + "\n", + "```\n", + "message Document {\n", + " required int64 DocId;\n", + " optional group Links {\n", + " repeated int64 Backward;\n", + " repeated int64 Forward; }\n", + " repeated group Name {\n", + " repeated group Language {\n", + " required string Code;\n", + " optional string Country; }\n", + " optional string Url; }}\n", + " ```\n", + "\n", + "and contains the following data:\n", + " \n", + "\n", + "```\n", + "Document\n", + " DocId: 10\n", + " Links\n", + " Forward: 20\n", + " Forward: 40\n", + " Forward: 60\n", + " Name\n", + " Language\n", + " Code: 'en-us'\n", + " Country: 'us'\n", + " Language\n", + " Code: 'en'\n", + " Url: 'http://A'\n", + " Name\n", + " Url: 'http://B'\n", + " Name\n", + " Language\n", + " Code: 'en-gb'\n", + " Country: 'gb'\n", + "Document\n", + " DocId: 20\n", + " Links\n", + " Backward: 10\n", + " Backward: 30\n", + " Forward: 80\n", + " Name\n", + " Url: 'http://C'\n", + "```\n", + "\n", + "\n", + "In this example, we will promote and broadcast the field `Links.Forward` and project it.\n", + "\n", + "batch_size will be the number of records (`Document`) per prensor. This works with optional and repeated fields, and will be able to batch the entire record.\n", + "\n", + "Feel free to try `batch_size = 2` in the below code. (Note this parquet file only has 2 records (`Document`) total).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 442 + }, + "id": "A8RyaU3EX4av", + "outputId": "9235bd5b-cca6-45e0-8244-0f58d6bdcd9b" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/struct2tensor/expression_impl/parquet.py:65: FutureWarning: The 'field_by_name' method is deprecated, use 'field' instead\n", + " [arrow_schema.field_by_name(name) for name in arrow_schema.names]))\n", + "/usr/local/lib/python3.6/dist-packages/struct2tensor/expression_impl/parquet.py:396: FutureWarning: The 'field_by_name' method is deprecated, use 'field' instead\n", + " for step in curr_steps_as_set\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============================\n", + "Schema of new_field prensor: \n", + "RootNodeTensor root\n", + " repeated ChildNodeTensor Name\n", + " repeated new_field\n", + "\n", + "Sparse tensor representation: \n", + "{Name.new_field: SparseTensor(values=[20, 40, 60, 20, 40, 60, 20, 40, 60], dense_shape=[1, 3, 3], \n", + " indices=[[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 2, 0], [0, 2, 1], [0, 2, 2]])}\n", + "\n", + "============================\n", + "Schema of new_field prensor: \n", + "RootNodeTensor root\n", + " repeated ChildNodeTensor Name\n", + " repeated new_field\n", + "\n", + "Sparse tensor representation: \n", + "{Name.new_field: SparseTensor(values=[80], dense_shape=[1, 1, 1], \n", + " indices=[[0, 0, 0]])}\n", + "\n", + "============================\n" + ] + } + ], + "source": [ + "#@test {\"skip\": true} \n", + "\n", + "from struct2tensor import expression_impl\n", + "\n", + "filenames = [\"dremel_example.parquet\"]\n", + "batch_size = 1\n", + "\n", + "exp = s2t.expression_impl.parquet.create_expression_from_parquet_file(filenames)\n", + "new_exp = exp.promote_and_broadcast({\"new_field\": \"Links.Forward\"}, \"Name\")\n", + "proj_exp = new_exp.project([\"Name.new_field\"])\n", + "proj_exp_needed = exp.project([\"Name.Url\"]) \n", + "# Please note that currently, proj_exp_needed needs to be passed into calculate.\n", + "# This is due to the way data is stored in parquet (values and repetition & \n", + "# definition levels). To construct the node for \"Name\", we need to read the \n", + "# values of a column containing \"Name\".\n", + "pqds = s2t.expression_impl.parquet.calculate_parquet_values([proj_exp, proj_exp_needed], exp, \n", + " filenames, batch_size)\n", + "\n", + "for prensors in pqds:\n", + " new_field_prensor = prensors[0]\n", + " print(\"============================\")\n", + " print(\"Schema of new_field prensor: \")\n", + " print(new_field_prensor)\n", + " print(\"\\nSparse tensor representation: \")\n", + " pretty.pprint(new_field_prensor)\n", + "print(\"============================\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "last_runtime": { + "kind": "private" + }, + "name": "struct2tensor: ML on structured data", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.20" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/index.md b/docs/index.md new file mode 120000 index 0000000..32d46ee --- /dev/null +++ b/docs/index.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js new file mode 100644 index 0000000..0be88e0 --- /dev/null +++ b/docs/javascripts/mathjax.js @@ -0,0 +1,19 @@ +window.MathJax = { + tex: { + inlineMath: [["\\(", "\\)"]], + displayMath: [["\\[", "\\]"]], + processEscapes: true, + processEnvironments: true + }, + options: { + ignoreHtmlClass: ".*|", + processHtmlClass: "arithmatex" + } +}; + +document$.subscribe(() => { + MathJax.startup.output.clearCache() + MathJax.typesetClear() + MathJax.texReset() + MathJax.typesetPromise() +}) diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..2caada4 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,38 @@ +:root { + --md-primary-fg-color: #FFA800; + --md-primary-fg-color--light: #CCCCCC; + --md-primary-fg-color--dark: #425066; +} + +.video-wrapper { + max-width: 240px; + display: flex; + flex-direction: row; +} +.video-wrapper > iframe { + width: 100%; + aspect-ratio: 16 / 9; +} + +p img { + background: white; +} + +.buttons-wrapper {} + +.buttons-wrapper > a { + justify-content: center; + align-content: center; + flex-wrap: nowrap; + align-items: center; + text-align: center; + flex: 1 1 30%; + display: flex; +} + +.md-button > .buttons-content { + align-items: center; + justify-content: center; + display: flex; + gap: 1em; +} diff --git a/examples/prensor_playground.ipynb b/examples/prensor_playground.ipynb deleted file mode 100644 index 1d382d3..0000000 --- a/examples/prensor_playground.ipynb +++ /dev/null @@ -1,1614 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "PHuwKiTF02Zq" - }, - "source": [ - "# Your structured data into Tensorflow.\n", - "\n", - "ML training often expects _flat_ data, like a line in a CSV.\n", - "[tf.Example](https://www.tensorflow.org/api_docs/python/tf/train/Example) was\n", - "designed to represent flat data. But the data you care about and want to predict\n", - "things about usually starts out _structured_.\n", - "\n", - "Over and over again you have to write transform code that turns your structured data into Tensors. This repetitive transform code must be rewritten over and over for all your ML pipelines both for training _and_ serving! And it lets bugs slip into your ML pipeline. \n", - "\n", - "`struct2tensor` lets you take advantage of structured data _within_ your ML pipelines. It is:\n", - "\n", - "* **for**: ML Engineers \n", - "* **who**: train models on data that starts out structured\n", - "* **it is**: a python library \n", - "* **that**: transforms your structured data into model-friendly (Sparse, Raggged, Dense, ...) tensors hermetically _within_ your model\n", - "* **unlike**: writing custom transforms over and over for training and serving.\n", - "\n", - "\n", - "---\n", - "\n", - "\n", - "![struct2tensor diagram showing the transform happens in the model](https://imgur.com/aqOX7nS.png)\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7ozCmoF33ogj" - }, - "source": [ - "# Demo example\n", - "\n", - "Suppose we have this _structured_ data we want to train on. The source example data format is a [protobuff](https://developers.google.com/protocol-buffers). `struct2tensor` was built internally and works on protobuffers now. It can be extended to parquet, json, etc. in the future.\n", - "\n", - "```\n", - "# e.g. a web session\n", - "message Session{\n", - " message SessionInfo {\n", - " string session_feature = 1;\n", - " double session_duration_sec = 2;\n", - " }\n", - " SessionInfo session_info = 1;\n", - " message Event {\n", - " string query = 1;\n", - " message Action {\n", - " int number_of_views = 1;\n", - " }\n", - " repeated Action action = 2;\n", - " }\n", - " repeated Event event = 2;\n", - "}\n", - "```\n", - "\n", - "\n", - "In 3 steps we'll extract the fields we want with `struct2tensor`. We'll end up with batch-aligned `SparseTensors`:\n", - "\n", - "1. Tell our model what examples we care about, e.g. **`event`** (submessage `Session::Event`).\n", - "2. Pick the proto fields that we think are good features, say:\n", - " * `session_info.session_feature`\n", - " * `event.query`\n", - "3. Identify the label to predict, say **`event.action.number_of_views`** (the actual label could be sum(action.number_of_views for action in event))\n", - "\n", - "\n", - "Then we can build a struct2tensor query that:\n", - "* parses instances of this protocol buffer\n", - "* transforms the fields we care about\n", - "* creates the necessary `SparseTensor`s\n", - "\n", - "Don't worry about some of these terms yet. We'll show you an example. And then explain the terms later." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "w2RkJ6mN2Y6-" - }, - "source": [ - "## Install required packages (internal colab users: skip)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 461 - }, - "id": "q3REXR58msJe", - "outputId": "9b3a6130-83ce-46af-fe61-44a8cb95b4d8" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: struct2tensor in /usr/local/lib/python3.6/dist-packages (0.0.1.dev6)\n", - "Requirement already satisfied: tensorflow-metadata\u003e=0.13.0 in /usr/local/lib/python3.6/dist-packages (from struct2tensor) (0.15.1)\n", - "Requirement already satisfied: protobuf\u003e=3.8.0 in /usr/local/lib/python3.6/dist-packages (from struct2tensor) (3.10.0)\n", - "Requirement already satisfied: tensorflow==1.15.0 in /usr/local/lib/python3.6/dist-packages (from struct2tensor) (1.15.0)\n", - "Requirement already satisfied: googleapis-common-protos in /usr/local/lib/python3.6/dist-packages (from tensorflow-metadata\u003e=0.13.0-\u003estruct2tensor) (1.6.0)\n", - "Requirement already satisfied: six\u003e=1.9 in /usr/local/lib/python3.6/dist-packages (from protobuf\u003e=3.8.0-\u003estruct2tensor) (1.12.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf\u003e=3.8.0-\u003estruct2tensor) (42.0.1)\n", - "Requirement already satisfied: numpy\u003c2.0,\u003e=1.16.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.17.4)\n", - "Requirement already satisfied: opt-einsum\u003e=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (3.1.0)\n", - "Requirement already satisfied: keras-preprocessing\u003e=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.1.0)\n", - "Requirement already satisfied: termcolor\u003e=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.1.0)\n", - "Requirement already satisfied: wheel\u003e=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (0.33.6)\n", - "Requirement already satisfied: grpcio\u003e=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.15.0)\n", - "Requirement already satisfied: tensorboard\u003c1.16.0,\u003e=1.15.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.15.0)\n", - "Requirement already satisfied: keras-applications\u003e=1.0.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.0.8)\n", - "Requirement already satisfied: tensorflow-estimator==1.15.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.15.1)\n", - "Requirement already satisfied: google-pasta\u003e=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (0.1.8)\n", - "Requirement already satisfied: gast==0.2.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (0.2.2)\n", - "Requirement already satisfied: wrapt\u003e=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (1.11.2)\n", - "Requirement already satisfied: absl-py\u003e=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (0.8.1)\n", - "Requirement already satisfied: astor\u003e=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow==1.15.0-\u003estruct2tensor) (0.8.0)\n", - "Requirement already satisfied: werkzeug\u003e=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard\u003c1.16.0,\u003e=1.15.0-\u003etensorflow==1.15.0-\u003estruct2tensor) (0.16.0)\n", - "Requirement already satisfied: markdown\u003e=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard\u003c1.16.0,\u003e=1.15.0-\u003etensorflow==1.15.0-\u003estruct2tensor) (3.1.1)\n", - "Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications\u003e=1.0.8-\u003etensorflow==1.15.0-\u003estruct2tensor) (2.8.0)\n", - "Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (0.10.1)\n" - ] - } - ], - "source": [ - "#@test {\"skip\": true} \n", - "# install struct2tensor\n", - "!pip install struct2tensor\n", - "# graphviz for pretty output\n", - "!pip install graphviz" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dIxHSM3VQfUu" - }, - "source": [ - "## Some Pretty Printing and Imports\n", - "\n", - "(not the \"real\" work yet)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "code", - "colab": { - "height": 34 - }, - "executionInfo": { - "elapsed": 437, - "status": "ok", - "timestamp": 1600375610067, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "lc5KF8MILYrS", - "outputId": "5b8c3534-db19-4f98-cc12-f44716bc402a", - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type-specific pretty printing ready to go\n" - ] - } - ], - "source": [ - "import base64\n", - "import numpy as np\n", - "import pprint\n", - "import os\n", - "import tensorflow\n", - "from graphviz import Source\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from IPython.display import Image\n", - "from IPython.lib import pretty\n", - "\n", - "import struct2tensor as s2t\n", - "from struct2tensor.test import test_pb2\n", - "from google.protobuf import text_format\n", - "\n", - "\n", - "def _display(graph):\n", - " \"\"\"Renders a graphviz digraph.\"\"\"\n", - " s = Source(graph)\n", - " s.format='svg'\n", - " return s\n", - " \n", - "\n", - "def _create_query_from_text_sessions(text_sessions):\n", - " \"\"\"Creates a struct2tensor query from a list of pbtxt of struct2tensor.test.Session.\"\"\"\n", - " sessions = tf.constant([\n", - " text_format.Merge(\n", - " text_session, \n", - " test_pb2.Session()\n", - " ).SerializeToString()\n", - " for text_session in text_sessions\n", - " ])\n", - " return s2t.create_expression_from_proto(\n", - " sessions, test_pb2.Session.DESCRIPTOR)\n", - "\n", - "def _prensor_pretty_printer(prensor, p, cycle):\n", - " \"\"\"Pretty printing function for struct2tensor.prensor.Prensor\"\"\"\n", - " pretty.pprint(prensor.get_sparse_tensors())\n", - "\n", - "def _sp_pretty_printer(sp, p, cycle):\n", - " \"\"\"Pretty printing function for SparseTensor.\"\"\"\n", - "\n", - " del cycle\n", - " p.begin_group(4, \"SparseTensor(\")\n", - " p.text(\"values={}, \".format(sp.values.numpy().tolist()))\n", - " p.text(\"dense_shape={}, \".format(sp.dense_shape.numpy().tolist()))\n", - " p.break_()\n", - " p.text(\"indices={}\".format(sp.indices.numpy().tolist()))\n", - " p.end_group(4, \")\")\n", - "\n", - "\n", - "pretty.for_type(tf.SparseTensor, _sp_pretty_printer)\n", - "pretty.for_type(s2t.Prensor, _prensor_pretty_printer)\n", - "\n", - "_pretty_print = pretty.pprint\n", - "\n", - "print(\"type-specific pretty printing ready to go\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sEsOWNuz7jC7" - }, - "source": [ - "## The real work:\n", - "\n", - "A function that parses our structured data (protobuffers) into tensors:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 34 - }, - "executionInfo": { - "elapsed": 378, - "status": "ok", - "timestamp": 1600375613528, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "ZC-oUzvBoPjA", - "outputId": "7a19c4e5-0cb1-479d-9245-e302a932448e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defined the workhorse func: (structured data at rest) -\u003e (tensors)\n" - ] - } - ], - "source": [ - "@tf.function(input_signature=[tf.TensorSpec(shape=(None), dtype=tf.string)], autograph=False)\n", - "def parse_session(serialized_sessions):\n", - " \"\"\"A TF function parsing a batch of serialized Session protos into tensors.\n", - "\n", - " It is a TF graph that takes one 1-D tensor as input, and outputs a\n", - " Dict[str, tf.SparseTensor]\n", - " \"\"\"\n", - " query = s2t.create_expression_from_proto(\n", - " serialized_sessions, test_pb2.Session.DESCRIPTOR)\n", - " # Move all the fields of our interest to under \"event\". \n", - " query = query.promote_and_broadcast({\n", - " \"session_feature\": \"session_info.session_feature\",\n", - " \"action_number_of_views\": \"event.action.number_of_views\" },\n", - " \"event\")\n", - " # Specify \"event\" to be examples.\n", - " query = query.reroot(\"event\")\n", - " # Extract all the fields of our interest.\n", - " projection = query.project([\"session_feature\", \"query\", \"action_number_of_views\"]) \n", - " prensors = s2t.calculate_prensors([projection])\n", - " \n", - " output_sparse_tensors = {}\n", - " for prensor in prensors:\n", - " path_to_tensor = prensor.get_sparse_tensors()\n", - " output_sparse_tensors.update({str(k): v for k, v in path_to_tensor.items()})\n", - " \n", - " return output_sparse_tensors\n", - "\n", - "print(\"Defined the workhorse func: (structured data at rest) -\u003e (tensors)\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VKp4SxTgPzpe" - }, - "source": [ - "## Lets see it in action:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 119 - }, - "executionInfo": { - "elapsed": 569, - "status": "ok", - "timestamp": 1600375616071, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "-cIlFypdPeZX", - "outputId": "7840d783-249c-4be2-d675-10f8e817dded" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'action_number_of_views': SparseTensor(values=[1, 2, 3], dense_shape=[2, 2], \n", - " indices=[[0, 0], [1, 0], [1, 1]]),\n", - " 'query': SparseTensor(values=[b'Hello', b'world'], dense_shape=[2], \n", - " indices=[[0], [1]]),\n", - " 'session_feature': SparseTensor(values=[b'foo', b'foo'], dense_shape=[2, 1], \n", - " indices=[[0, 0], [1, 0]])}\n" - ] - } - ], - "source": [ - "serialized_sessions = tf.constant([\n", - " text_format.Merge(\n", - " \"\"\"\n", - " session_info {\n", - " session_duration_sec: 1.0\n", - " session_feature: \"foo\"\n", - " }\n", - " event {\n", - " query: \"Hello\"\n", - " action {\n", - " number_of_views: 1\n", - " }\n", - " action {\n", - " }\n", - " }\n", - " event {\n", - " query: \"world\"\n", - " action {\n", - " number_of_views: 2\n", - " }\n", - " action {\n", - " number_of_views: 3\n", - " }\n", - " }\n", - " \"\"\",\n", - " test_pb2.Session()\n", - " ).SerializeToString()\n", - "])\n", - "\n", - "_pretty_print(parse_session(serialized_sessions))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pW9zBjNMWMIH" - }, - "source": [ - "See how we went from our pre-pipeline data (the Protobuffer) all the way to the structured data, packed into `SparseTensor`s?" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QY7wy_6loF4v" - }, - "source": [ - "# Digging Far Deeper\n", - "Interested and want to learn more? Read on...\n", - "\n", - "Let's define several terms we mentioned before:\n", - "\n", - "### Prensor\n", - "\n", - "A Prensor (protobuffer + tensor) is a data structure storing the data we work on. We use protobuffers a lot at Google. `struct2tensor` can support other structured formats, too.\n", - "\n", - "For example, throughout this colab we will be using proto\n", - "[`struct2tensor.test.Session`](http://cs/symbol:struct2tensor.test.Session). A schematic visualization\n", - "of a selected part of the prensor from that proto looks like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 543 - }, - "id": "ZTjNwx4bBXFk", - "outputId": "4927fdf8-0d2c-46e6-ee2f-a1b2bf4a298f" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"288pt\" height=\"392pt\"\n viewBox=\"0.00 0.00 288.04 392.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 388)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-388 284.0386,-388 284.0386,4 -4,4\"/\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"181.643\" cy=\"-366\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"181.643\" y=\"-362.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"181.643\" cy=\"-279\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"181.643\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M181.643,-347.9735C181.643,-336.1918 181.643,-320.5607 181.643,-307.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"185.1431,-307.0033 181.643,-297.0034 178.1431,-307.0034 185.1431,-307.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"185.643\" y=\"-318.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"132.643\" cy=\"-192\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"132.643\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M171.7271,-261.3943C164.7744,-249.0496 155.358,-232.3306 147.51,-218.3965\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"150.3339,-216.2782 142.3769,-209.2827 144.2348,-219.7133 150.3339,-216.2782\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"164.643\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session_id --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003esession_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"230.643\" cy=\"-192\" rx=\"49.2915\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"230.643\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;session_id --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M191.5589,-261.3943C198.4208,-249.211 207.6823,-232.7669 215.4675,-218.9443\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"218.7159,-220.3089 220.5737,-209.8782 212.6167,-216.8737 218.7159,-220.3089\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"213.143\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- action --\u003e\n\u003cg id=\"node5\" class=\"node\"\u003e\n\u003ctitle\u003eaction\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"78.643\" cy=\"-105\" rx=\"33.2948\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"78.643\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eaction\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;action --\u003e\n\u003cg id=\"edge4\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;action\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M121.9744,-174.8116C114.2085,-162.3 103.5554,-145.1366 94.7556,-130.9592\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"97.5369,-128.8033 89.2895,-122.1527 91.5894,-132.4949 97.5369,-128.8033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"113.643\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- query_token --\u003e\n\u003cg id=\"node6\" class=\"node\"\u003e\n\u003ctitle\u003equery_token\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"187.643\" cy=\"-105\" rx=\"57.6901\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"187.643\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003equery_token\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;query_token --\u003e\n\u003cg id=\"edge5\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;query_token\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M143.5093,-174.8116C151.3162,-162.4624 161.988,-145.5816 170.8819,-131.513\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"174.0365,-133.0729 176.4218,-122.75 168.1197,-129.3323 174.0365,-133.0729\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"167.643\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- number_of_views --\u003e\n\u003cg id=\"node7\" class=\"node\"\u003e\n\u003ctitle\u003enumber_of_views\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"78.643\" cy=\"-18\" rx=\"78.7863\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"78.643\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003enumber_of_views\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- action\u0026#45;\u0026gt;number_of_views --\u003e\n\u003cg id=\"edge6\" class=\"edge\"\u003e\n\u003ctitle\u003eaction\u0026#45;\u0026gt;number_of_views\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M78.643,-86.9735C78.643,-75.1918 78.643,-59.5607 78.643,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"82.1431,-46.0033 78.643,-36.0034 75.1431,-46.0034 82.1431,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"82.143\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc6d4c1160\u003e" - ] - }, - "execution_count": 5, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display(\"\"\"\n", - "digraph {\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e event [label=\"*\"];\n", - " session -\u003e session_id [label=\"?\"];\n", - " event -\u003e action [label=\"*\"];\n", - " event -\u003e query_token [label=\"*\"]\n", - " action -\u003e number_of_views [label=\"?\"];\n", - "}\n", - "\"\"\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "P0aBK7NbO0wp" - }, - "source": [ - "We will be using visualizations like this to demostrate struct2tensor queries later.\n", - "\n", - "Note:\n", - "\n", - "* The \"*\" on the edge means the pointed node has repeated values; while the \"?\" means it has an optional value.\n", - "* There is always a \"root\" node whose only child is the root of the structure. Note that it's \"repeated\" because one struct2tensorTree can represent multiple instances of a structure.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5stZYn1dBXdl" - }, - "source": [ - "\n", - "### struct2tensor Query\n", - "A struct2tensor query transforms a Prensor into another Prensor.\n", - "\n", - "For example, `broadcast` is a query that replicates a node as a child of one of its siblings.\n", - "\n", - "Applying\n", - "```\n", - "broadcast(\n", - " source_path=\"session.session_id\",\n", - " sibling=\"event\",\n", - " new_field_name=\"session_session_id\")\n", - "```\n", - "\n", - "on the previous tree gives:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 543 - }, - "id": "zfhF-frzPxZm", - "outputId": "36bd1e53-befe-4bfa-e7da-737bd18a2611" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"387pt\" height=\"392pt\"\n viewBox=\"0.00 0.00 387.44 392.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 388)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-388 383.4377,-388 383.4377,4 -4,4\"/\u003e\n\u003c!-- session_session_id --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003esession_session_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#ff0000\" cx=\"80.5928\" cy=\"-105\" rx=\"80.6858\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"80.5928\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_session_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"261.5928\" cy=\"-366\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"261.5928\" y=\"-362.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"261.5928\" cy=\"-279\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"261.5928\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M261.5928,-347.9735C261.5928,-336.1918 261.5928,-320.5607 261.5928,-307.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"265.0929,-307.0033 261.5928,-297.0034 258.0929,-307.0034 265.0929,-307.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"265.5928\" y=\"-318.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"212.5928\" cy=\"-192\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"212.5928\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M251.677,-261.3943C244.7242,-249.0496 235.3078,-232.3306 227.4598,-218.3965\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"230.2838,-216.2782 222.3268,-209.2827 224.1846,-219.7133 230.2838,-216.2782\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"244.5928\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session_id --\u003e\n\u003cg id=\"node5\" class=\"node\"\u003e\n\u003ctitle\u003esession_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"310.5928\" cy=\"-192\" rx=\"49.2915\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"310.5928\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;session_id --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M271.5087,-261.3943C278.3706,-249.211 287.6322,-232.7669 295.4173,-218.9443\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"298.6657,-220.3089 300.5235,-209.8782 292.5665,-216.8737 298.6657,-220.3089\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"293.0928\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;session_session_id --\u003e\n\u003cg id=\"edge5\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;session_session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M191.9678,-178.4063C171.4317,-164.8711 139.5358,-143.8488 115.1315,-127.7641\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"116.79,-124.6654 106.5143,-122.0846 112.9378,-130.5101 116.79,-124.6654\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"158.0928\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- action --\u003e\n\u003cg id=\"node6\" class=\"node\"\u003e\n\u003ctitle\u003eaction\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"212.5928\" cy=\"-105\" rx=\"33.2948\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"212.5928\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eaction\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;action --\u003e\n\u003cg id=\"edge4\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;action\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M212.5928,-173.9735C212.5928,-162.1918 212.5928,-146.5607 212.5928,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"216.0929,-133.0033 212.5928,-123.0034 209.0929,-133.0034 216.0929,-133.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"216.5928\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- query_token --\u003e\n\u003cg id=\"node7\" class=\"node\"\u003e\n\u003ctitle\u003equery_token\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"321.5928\" cy=\"-105\" rx=\"57.6901\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"321.5928\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003equery_token\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;query_token --\u003e\n\u003cg id=\"edge6\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;query_token\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M231.0832,-177.2416C247.8774,-163.8371 272.929,-143.8418 292.4215,-128.2836\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"294.7624,-130.8933 300.3947,-121.9196 290.3956,-125.4223 294.7624,-130.8933\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"278.5928\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- number_of_views --\u003e\n\u003cg id=\"node8\" class=\"node\"\u003e\n\u003ctitle\u003enumber_of_views\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"212.5928\" cy=\"-18\" rx=\"78.7863\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"212.5928\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003enumber_of_views\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- action\u0026#45;\u0026gt;number_of_views --\u003e\n\u003cg id=\"edge7\" class=\"edge\"\u003e\n\u003ctitle\u003eaction\u0026#45;\u0026gt;number_of_views\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M212.5928,-86.9735C212.5928,-75.1918 212.5928,-59.5607 212.5928,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"216.0929,-46.0033 212.5928,-36.0034 209.0929,-46.0034 216.0929,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"216.0928\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc6d4c1320\u003e" - ] - }, - "execution_count": 6, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display(\"\"\"\n", - "digraph {\n", - " session_session_id [color=\"red\"];\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e event [label=\"*\"];\n", - " session -\u003e session_id [label=\"?\"];\n", - " event -\u003e action [label=\"*\"];\n", - " event -\u003e session_session_id [label=\"?\"];\n", - " event -\u003e query_token [label=\"*\"];\n", - " action -\u003e number_of_views [label=\"?\"];\n", - "}\n", - "\"\"\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eOHokTjBPyHW" - }, - "source": [ - "We will talk about common struct2tensor queries in later sections.\n", - "\n", - "### Projection\n", - "A projection of paths in a Prensor produces another Prensor with just the selected paths.\n", - "\n", - "#### Logical representation of a projection\n", - "The structure of the projected path can be represented losslessly as nested lists. For example, the projection of `event.action.number_of_views` from the struct2tensorTree formed by the following two instances of `struct2tensor.test.Session`:\n", - "```\n", - "{\n", - " event { action { number_of_views: 1} action { number_of_views: 2} action {} }\n", - " event {}\n", - "}, {\n", - " event { action { number_of_views: 3} }\n", - "}\n", - "```\n", - "\n", - "is:\n", - "\n", - "```\n", - "[ # the outer list has two elements b/c there are two Session protos.\n", - " [ # the first proto has two events\n", - " [[1],[2],[]], # 3 actions, the last one does not have a number_of_views.\n", - " [], # the second event does not have action\n", - " ],\n", - " [ # the second proto has one event\n", - " [[3]],\n", - " ],\n", - "]\n", - "```\n", - "\n", - "#### Representing nested lists with `tf.SparseTensor`\n", - "\n", - "struct2tensor uses `tf.SparseTensor` to represent the above nested list in the projection results. Note that `tf.SparseTensor` essentially enforces that the lists nested at the same level to have the same length (because the there is a certain size for each dimension), therefore this representation is lossy. The above nested lists, when written as a SparseTensor will look like:\n", - "```\n", - "tf.SparseTensor(\n", - " dense_shape=[2, 2, 3, 1], # each is the maximum length of lists at the same nesting level.\n", - " values = [1, 2, 3],\n", - " indices = [[0, 0, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]\n", - ")\n", - "```\n", - "\n", - "Note that the last dimension is useless: the index of that dimension will always be 0 for any present value because number_of_views is an optional field. So struct2tensors library will actually \"squeeze\" all the optional dimensions.\n", - "\n", - "The actual result would be:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 68 - }, - "executionInfo": { - "elapsed": 352, - "status": "ok", - "timestamp": 1600368919353, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "XvuEY3D3WIP7", - "outputId": "ee33d486-90e7-4328-accc-11f13f82a9db" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{event.action.number_of_views: SparseTensor(values=[1, 2, 3], dense_shape=[2, 2, 3], \n", - " indices=[[0, 0, 0], [0, 0, 1], [1, 0, 0]])}\n", - "]\n" - ] - } - ], - "source": [ - "query = _create_query_from_text_sessions(['''\n", - " event { action { number_of_views: 1} action { number_of_views: 2} action {} }\n", - " event {}\n", - " ''', '''\n", - " event { action { number_of_views: 3} }\n", - " ''']\n", - " ).project([\"event.action.number_of_views\"])\n", - "\n", - "prensor = s2t.calculate_prensors([query])\n", - "pretty.pprint(prensor)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iaailOJhWLLa" - }, - "source": [ - "struct2tensor's internal data model is closer to the above \"nested lists\" abstraction and sometimes it's easier to reason with \"nested lists\" than with `SparseTensor`s.\n", - "\n", - "Recently, [`tf.RaggedTensor`](https://www.tensorflow.org/guide/ragged_tensors) was introduced to represent nested lists exactly. We are working on adding support for projecting into ragged tensors." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UD48ikK4Eop-" - }, - "source": [ - "## Common struct2tensor Queries\n", - "\n", - "### `promote`\n", - "\n", - "Promotes a node to become a sibling of its parent. If the node is repeated, then all its values are concatenated (the order is preserved)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 426 - }, - "id": "tnbD2lKoDbsk", - "outputId": "244aeef6-48b7-4ae5-f01c-2429bb65a7ea" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"124pt\" height=\"305pt\"\n viewBox=\"0.00 0.00 123.69 305.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 301)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-301 119.6897,-301 119.6897,4 -4,4\"/\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"57.8449\" cy=\"-279\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"57.8449\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"57.8449\" cy=\"-192\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"57.8449\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M57.8449,-260.9735C57.8449,-249.1918 57.8449,-233.5607 57.8449,-220.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"61.345,-220.0033 57.8449,-210.0034 54.345,-220.0034 61.345,-220.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"61.8449\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"57.8449\" cy=\"-105\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"57.8449\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M57.8449,-173.9735C57.8449,-162.1918 57.8449,-146.5607 57.8449,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"61.345,-133.0033 57.8449,-123.0034 54.345,-133.0034 61.345,-133.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"61.8449\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- query_token --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003equery_token\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"57.8449\" cy=\"-18\" rx=\"57.6901\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"57.8449\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003equery_token\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;query_token --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;query_token\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M57.8449,-86.9735C57.8449,-75.1918 57.8449,-59.5607 57.8449,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"61.345,-46.0033 57.8449,-36.0034 54.345,-46.0034 61.345,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"61.8449\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc6d4c1438\u003e" - ] - }, - "execution_count": 8, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display('''\n", - "digraph {\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e event [label=\"*\"];\n", - " event -\u003e query_token [label=\"*\"];\n", - "}\n", - "''')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OOgJlcHPDyIk" - }, - "source": [ - "`promote(source_path=\"event.query_token\", new_field_name=\"event_query_token\")`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 426 - }, - "id": "xKJ_EraVHyUA", - "outputId": "64c3720d-3672-4625-d8c1-1a4b301d6a2d" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"281pt\" height=\"305pt\"\n viewBox=\"0.00 0.00 281.04 305.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 301)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-301 277.0375,-301 277.0375,4 -4,4\"/\u003e\n\u003c!-- event_query_token --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003eevent_query_token\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#ff0000\" cx=\"83.1926\" cy=\"-105\" rx=\"83.3857\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"83.1926\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent_query_token\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"98.1926\" cy=\"-279\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"98.1926\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"98.1926\" cy=\"-192\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"98.1926\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M98.1926,-260.9735C98.1926,-249.1918 98.1926,-233.5607 98.1926,-220.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"101.6927,-220.0033 98.1926,-210.0034 94.6927,-220.0034 101.6927,-220.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"102.1926\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event_query_token --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event_query_token\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M95.0846,-173.9735C93.0533,-162.1918 90.3582,-146.5607 88.0475,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"91.4449,-132.2633 86.2966,-123.0034 84.5467,-133.4527 91.4449,-132.2633\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"96.1926\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"215.1926\" cy=\"-105\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"215.1926\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M118.5725,-176.8458C137.7919,-162.5544 166.7343,-141.0331 187.9324,-125.2705\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"190.2713,-127.8928 196.2075,-119.1171 186.0944,-122.2756 190.2713,-127.8928\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"168.1926\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- query_token --\u003e\n\u003cg id=\"node5\" class=\"node\"\u003e\n\u003ctitle\u003equery_token\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"215.1926\" cy=\"-18\" rx=\"57.6901\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"215.1926\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003equery_token\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;query_token --\u003e\n\u003cg id=\"edge4\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;query_token\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M215.1926,-86.9735C215.1926,-75.1918 215.1926,-59.5607 215.1926,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"218.6927,-46.0033 215.1926,-36.0034 211.6927,-46.0034 218.6927,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"219.1926\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc58c02358\u003e" - ] - }, - "execution_count": 9, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display('''\n", - "digraph {\n", - " event_query_token [color=\"red\"];\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e event [label=\"*\"];\n", - " session -\u003e event_query_token [label=\"*\"];\n", - " event -\u003e query_token [label=\"*\"];\n", - "}\n", - "''')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 68 - }, - "executionInfo": { - "elapsed": 344, - "status": "ok", - "timestamp": 1600296594869, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "oQCcVWd-JDT9", - "outputId": "acad1b9a-0985-46bb-b895-7d085814d20e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{event_query_token: SparseTensor(values=[b'abc', b'def', b'ghi'], dense_shape=[1, 3], \n", - " indices=[[0, 0], [0, 1], [0, 2]])}\n", - "]\n" - ] - } - ], - "source": [ - "query = (_create_query_from_text_sessions([\n", - "\"\"\"\n", - "event {\n", - " query_token: \"abc\"\n", - " query_token: \"def\"\n", - "}\n", - "event {\n", - " query_token: \"ghi\"\n", - "}\n", - "\"\"\"])\n", - " .promote(source_path=\"event.query_token\", new_field_name=\"event_query_token\")\n", - " .project([\"event_query_token\"]))\n", - "\n", - "prensor = s2t.calculate_prensors([query])\n", - "\n", - "_pretty_print(prensor)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "av9km5d_KM8o" - }, - "source": [ - "The projected structure is like:\n", - "```\n", - "{\n", - " # this is under Session.\n", - " event_query_token: \"abc\"\n", - " event_query_token: \"def\"\n", - " event_query_token: \"ghi\"\n", - "}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t2BU8warKqFm" - }, - "source": [ - "### `broadcast`\n", - "\n", - "Broadcasts the value of a node to one of its sibling. The value will be replicated if the sibling is repeated. This is similar to TensorFlow and Numpy's [broadcasting semantics](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.broadcasting.html).\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 311 - }, - "id": "1hFGEV0DMmOo", - "outputId": "ff23c5ce-17c9-4604-f309-c9b384b6d8ca" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"186pt\" height=\"218pt\"\n viewBox=\"0.00 0.00 185.94 218.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 214)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-214 181.9429,-214 181.9429,4 -4,4\"/\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"98.3956\" cy=\"-192\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"98.3956\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"98.3956\" cy=\"-105\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"98.3956\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M98.3956,-173.9735C98.3956,-162.1918 98.3956,-146.5607 98.3956,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"101.8957,-133.0033 98.3956,-123.0034 94.8957,-133.0034 101.8957,-133.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"102.3956\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session_id --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003esession_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"49.3956\" cy=\"-18\" rx=\"49.2915\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"49.3956\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;session_id --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M88.4797,-87.3943C81.6179,-75.211 72.3563,-58.7669 64.5711,-44.9443\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"67.4219,-42.8737 59.4649,-35.8782 61.3228,-46.3089 67.4219,-42.8737\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"80.8956\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"147.3956\" cy=\"-18\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"147.3956\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M108.3115,-87.3943C115.2642,-75.0496 124.6807,-58.3306 132.5286,-44.3965\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"135.8039,-45.7133 137.6617,-35.2827 129.7047,-42.2782 135.8039,-45.7133\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"129.3956\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc58d8cc50\u003e" - ] - }, - "execution_count": 11, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display('''\n", - "digraph {\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e session_id [label=\"?\"];\n", - " session -\u003e event [label=\"*\"];\n", - "}\n", - "''')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DvBCsWBuNDKV" - }, - "source": [ - "`broadcast(source_path=\"session_id\", sibling_field=\"event\", new_field_name=\"session_session_id\")`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 426 - }, - "id": "wTUHyGvSNMGp", - "outputId": "d1ef4902-72eb-4bd2-e0bb-3008a9de17e7" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"236pt\" height=\"305pt\"\n viewBox=\"0.00 0.00 235.99 305.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 301)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-301 231.9885,-301 231.9885,4 -4,4\"/\u003e\n\u003c!-- session_session_id --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003esession_session_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#ff0000\" cx=\"147.3956\" cy=\"-18\" rx=\"80.6858\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"147.3956\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_session_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"64.3956\" cy=\"-279\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"64.3956\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"64.3956\" cy=\"-192\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"64.3956\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M64.3956,-260.9735C64.3956,-249.1918 64.3956,-233.5607 64.3956,-220.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"67.8957,-220.0033 64.3956,-210.0034 60.8957,-220.0034 67.8957,-220.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"68.3956\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session_id --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003esession_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"49.3956\" cy=\"-105\" rx=\"49.2915\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"49.3956\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;session_id --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M61.2876,-173.9735C59.2563,-162.1918 56.5612,-146.5607 54.2505,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"57.6479,-132.2633 52.4996,-123.0034 50.7497,-133.4527 57.6479,-132.2633\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"61.8956\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node5\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"147.3956\" cy=\"-105\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"147.3956\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M80.0073,-175.636C92.8036,-162.2229 111.0676,-143.0787 125.3545,-128.1034\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"128.0426,-130.3561 132.413,-120.7047 122.9778,-125.5241 128.0426,-130.3561\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"114.3956\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;session_session_id --\u003e\n\u003cg id=\"edge4\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;session_session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M147.3956,-86.9735C147.3956,-75.1918 147.3956,-59.5607 147.3956,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"150.8957,-46.0033 147.3956,-36.0034 143.8957,-46.0034 150.8957,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"150.8956\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc58c25550\u003e" - ] - }, - "execution_count": 12, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display('''\n", - "digraph {\n", - " session_session_id [color=\"red\"];\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e session_id [label=\"?\"];\n", - " session -\u003e event [label=\"*\"];\n", - " event -\u003e session_session_id [label=\"?\"];\n", - "}\n", - "''')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 68 - }, - "executionInfo": { - "elapsed": 341, - "status": "ok", - "timestamp": 1600296607633, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "wQE_ceMzNjzv", - "outputId": "d92d9977-1e4c-4138-e89c-94e9c0f4bf5e" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{event.session_session_id: SparseTensor(values=[8, 8], dense_shape=[1, 2], \n", - " indices=[[0, 0], [0, 1]])}\n", - "]\n" - ] - } - ], - "source": [ - "query = (_create_query_from_text_sessions([\n", - "\"\"\"\n", - "session_id: 8\n", - "event { }\n", - "event { }\n", - "\"\"\"])\n", - " .broadcast(source_path=\"session_id\",\n", - " sibling_field=\"event\",\n", - " new_field_name=\"session_session_id\")\n", - " .project([\"event.session_session_id\"]))\n", - "\n", - "prensor = s2t.calculate_prensors([query])\n", - "_pretty_print(prensor)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7fL8pmsQObUT" - }, - "source": [ - "The projected structure is like:\n", - "```\n", - "{\n", - " event {\n", - " session_session_id: 8\n", - " }\n", - " event {\n", - " session_session_id: 8\n", - " }\n", - "}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ch2WgR9AP23Q" - }, - "source": [ - "### `promote_and_broadcast`\n", - "The query accepts multiple source fields and a destination field. For each source field, it first promotes it to the least common ancestor with the destination field (if necessary), then broadcasts it to the destination field (if necessary).\n", - "\n", - "Usually for the purpose of machine learning, this gives a reasonable flattened representation of nested structures." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-uNfasxgRsvO" - }, - "source": [ - "```\n", - "promote_and_broadcast(\n", - " path_dictionary={\n", - " 'session_info_duration_sec': 'session_info.session_duration_sec'},\n", - " dest_path_parent='event.action')\n", - "```\n", - "is equivalent to:\n", - "```\n", - "promote(source_path='session_info.session_duration_sec',\n", - " new_field_name='anonymous_field1')\n", - "\n", - "broadcast(source_path='anonymous_field1',\n", - " sibling_field='event.action',\n", - " new_field_name='session_info_duration_sec')\n", - "```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rYkWc_u8SR_k" - }, - "source": [ - "### `map_field_values`\n", - "\n", - "Creates a new node that is a sibling of a leaf node. The values of the new node are results of applying the given function to the values of the source node.\n", - "\n", - "Note that the function provided takes 1-D tensor that contains all the values of the source node as input and should also output a 1-D tensor of the same size, and it should build TF ops.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 68 - }, - "executionInfo": { - "elapsed": 333, - "status": "ok", - "timestamp": 1600296617311, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "DWjA2OFcS4k1", - "outputId": "7a2d8ef5-7111-4b45-81dd-65d8222badae" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{session_id_plus_one: SparseTensor(values=[9, 10], dense_shape=[2], \n", - " indices=[[0], [1]])}\n", - "]\n" - ] - } - ], - "source": [ - "query = (_create_query_from_text_sessions([\n", - "\"\"\"\n", - "session_id: 8\n", - "\"\"\",\n", - "\"\"\"\n", - "session_id: 9\n", - "\"\"\"])\n", - " .map_field_values(\"session_id\", lambda x: tf.add(x, 1), dtype=tf.int64,\n", - " new_field_name=\"session_id_plus_one\")\n", - " .project([\"session_id_plus_one\"]))\n", - " \n", - "prensor = s2t.calculate_prensors([query])\n", - "\n", - "_pretty_print(prensor)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_NusnDa1T7s5" - }, - "source": [ - "### `reroot`\n", - "\n", - "Makes the given node the new root of the struct2tensorTree. This has two effects:\n", - "\n", - "* restricts the scope of the struct2tensorTree\n", - " + The field paths in all the following queries are relative to the new root\n", - " + There's no way to refer to nodes that are outside the subtree rooted at the new root.\n", - "* changes the batch dimension.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 426 - }, - "id": "OBNPdOlQU6qS", - "outputId": "df56d6d2-72c5-4c90-e2b9-b3fe2bffde00" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"198pt\" height=\"305pt\"\n viewBox=\"0.00 0.00 198.29 305.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 301)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-301 194.2918,-301 194.2918,4 -4,4\"/\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"64.3956\" cy=\"-279\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"64.3956\" y=\"-275.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003esession\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"64.3956\" cy=\"-192\" rx=\"37.0935\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"64.3956\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;session --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;session\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M64.3956,-260.9735C64.3956,-249.1918 64.3956,-233.5607 64.3956,-220.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"67.8957,-220.0033 64.3956,-210.0034 60.8957,-220.0034 67.8957,-220.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"68.3956\" y=\"-231.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session_id --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003esession_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"49.3956\" cy=\"-105\" rx=\"49.2915\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"49.3956\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003esession_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;session_id --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;session_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M61.2876,-173.9735C59.2563,-162.1918 56.5612,-146.5607 54.2505,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"57.6479,-132.2633 52.4996,-123.0034 50.7497,-133.4527 57.6479,-132.2633\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"61.8956\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node4\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"147.3956\" cy=\"-105\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"147.3956\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- session\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge3\" class=\"edge\"\u003e\n\u003ctitle\u003esession\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M80.0073,-175.636C92.8036,-162.2229 111.0676,-143.0787 125.3545,-128.1034\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"128.0426,-130.3561 132.413,-120.7047 122.9778,-125.5241 128.0426,-130.3561\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"114.3956\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event_id --\u003e\n\u003cg id=\"node5\" class=\"node\"\u003e\n\u003ctitle\u003eevent_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"147.3956\" cy=\"-18\" rx=\"42.7926\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"147.3956\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;event_id --\u003e\n\u003cg id=\"edge4\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;event_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M147.3956,-86.9735C147.3956,-75.1918 147.3956,-59.5607 147.3956,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"150.8957,-46.0033 147.3956,-36.0034 143.8957,-46.0034 150.8957,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"150.8956\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc58c9f3c8\u003e" - ] - }, - "execution_count": 15, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display('''\n", - "digraph {\n", - " root -\u003e session [label=\"*\"];\n", - " session -\u003e session_id [label=\"?\"];\n", - " session -\u003e event [label=\"*\"];\n", - " event -\u003e event_id [label=\"?\"];\n", - "}\n", - "''')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HFlI3zMtU6Ev" - }, - "source": [ - "`reroot(\"event\")`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 311 - }, - "id": "uANFWQA-T68W", - "outputId": "659a11b1-39bb-456d-de90-b2675182ca25" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\u003c?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?\u003e\n\u003c!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\"\u003e\n\u003c!-- Generated by graphviz version 2.40.1 (20161225.0304)\n --\u003e\n\u003c!-- Title: %3 Pages: 1 --\u003e\n\u003csvg width=\"94pt\" height=\"218pt\"\n viewBox=\"0.00 0.00 93.79 218.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\"\u003e\n\u003cg id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 214)\"\u003e\n\u003ctitle\u003e%3\u003c/title\u003e\n\u003cpolygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-214 89.7924,-214 89.7924,4 -4,4\"/\u003e\n\u003c!-- root --\u003e\n\u003cg id=\"node1\" class=\"node\"\u003e\n\u003ctitle\u003eroot\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"42.8962\" cy=\"-192\" rx=\"27\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"42.8962\" y=\"-188.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eroot\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event --\u003e\n\u003cg id=\"node2\" class=\"node\"\u003e\n\u003ctitle\u003eevent\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"42.8962\" cy=\"-105\" rx=\"30.5947\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"42.8962\" y=\"-101.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- root\u0026#45;\u0026gt;event --\u003e\n\u003cg id=\"edge1\" class=\"edge\"\u003e\n\u003ctitle\u003eroot\u0026#45;\u0026gt;event\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M42.8962,-173.9735C42.8962,-162.1918 42.8962,-146.5607 42.8962,-133.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"46.3963,-133.0033 42.8962,-123.0034 39.3963,-133.0034 46.3963,-133.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"46.8962\" y=\"-144.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e*\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event_id --\u003e\n\u003cg id=\"node3\" class=\"node\"\u003e\n\u003ctitle\u003eevent_id\u003c/title\u003e\n\u003cellipse fill=\"none\" stroke=\"#000000\" cx=\"42.8962\" cy=\"-18\" rx=\"42.7926\" ry=\"18\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"42.8962\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003eevent_id\u003c/text\u003e\n\u003c/g\u003e\n\u003c!-- event\u0026#45;\u0026gt;event_id --\u003e\n\u003cg id=\"edge2\" class=\"edge\"\u003e\n\u003ctitle\u003eevent\u0026#45;\u0026gt;event_id\u003c/title\u003e\n\u003cpath fill=\"none\" stroke=\"#000000\" d=\"M42.8962,-86.9735C42.8962,-75.1918 42.8962,-59.5607 42.8962,-46.1581\"/\u003e\n\u003cpolygon fill=\"#000000\" stroke=\"#000000\" points=\"46.3963,-46.0033 42.8962,-36.0034 39.3963,-46.0034 46.3963,-46.0033\"/\u003e\n\u003ctext text-anchor=\"middle\" x=\"46.3962\" y=\"-57.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\"\u003e?\u003c/text\u003e\n\u003c/g\u003e\n\u003c/g\u003e\n\u003c/svg\u003e\n", - "text/plain": [ - "\u003cgraphviz.files.Source at 0x7efc58c9f908\u003e" - ] - }, - "execution_count": 16, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "#@test {\"skip\": true} \n", - "\n", - "_display('''\n", - "digraph {\n", - " root -\u003e event [label=\"*\"];\n", - " event -\u003e event_id [label=\"?\"];\n", - "}\n", - "''')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "height": 459 - }, - "executionInfo": { - "elapsed": 408, - "status": "ok", - "timestamp": 1600297140484, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "lMoeHHllVeet", - "outputId": "631afc7a-ebb3-456f-d0a1-11eeeccee95c" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Assume the following Sessions: \n", - "[session_id: 1\n", - "event {\n", - " event_id: \"a\"\n", - "}\n", - "event {\n", - " event_id: \"b\"\n", - "}\n", - ", session_id: 2\n", - ", session_id: 3\n", - "event {\n", - " event_id: \"c\"\n", - "}\n", - "]\n", - "\n", - "\n", - "project([\"event.event_id\"]) before reroot() (the batch dimension is the index to sessions):\n", - "[{event.event_id: SparseTensor(values=[b'a', b'b', b'c'], dense_shape=[3, 2], \n", - " indices=[[0, 0], [0, 1], [2, 0]])}\n", - "]\n", - "\n", - "\n", - "project([\"event_id\"]) after reroot() (the batch dimension becomes the index to events):\n", - "[{event_id: SparseTensor(values=[b'a', b'b', b'c'], dense_shape=[3], \n", - " indices=[[0], [1], [2]])}\n", - "]\n" - ] - } - ], - "source": [ - "#@title { display-mode: \"form\" }\n", - "text_protos = [\"\"\"\n", - "session_id: 1\n", - "event {\n", - " event_id: \"a\"\n", - "}\n", - "event {\n", - " event_id: \"b\"\n", - "}\n", - "\"\"\",\n", - "\"\"\"\n", - "session_id: 2\n", - "\"\"\",\n", - "\"\"\"\n", - "session_id: 3\n", - "event {\n", - " event_id: \"c\"\n", - "}\n", - "\"\"\"\n", - "]\n", - "print(\"\"\"Assume the following Sessions: \"\"\")\n", - "print([text_format.Merge(p, s2t.test.test_pb2.Session()) for p in text_protos])\n", - "print(\"\\n\")\n", - "reroot_example_query = _create_query_from_text_sessions(text_protos)\n", - "\n", - "print(\"\"\"project([\"event.event_id\"]) before reroot() (the batch dimension is the index to sessions):\"\"\")\n", - "_pretty_print(s2t.calculate_prensors([reroot_example_query.project([\"event.event_id\"])]))\n", - "print(\"\\n\")\n", - "print(\"\"\"project([\"event_id\"]) after reroot() (the batch dimension becomes the index to events):\"\"\")\n", - "_pretty_print(s2t.calculate_prensors([reroot_example_query.reroot(\"event\").project([\"event_id\"])]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "70nd50UgLlQU" - }, - "source": [ - "### Proto Map\n", - "\n", - "You can specify a key for the proto map field in a path via brackets.\n", - "\n", - "Given the following tf.Example:\n", - "```\n", - "features {\n", - " feature {\n", - " key: \"my_feature\"\n", - " value {\n", - " float_list {\n", - " value: 1.0\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"other_feature\"\n", - " value {\n", - " bytes_list {\n", - " value: \"my_val\"\n", - " }\n", - " }\n", - " }\n", - "}\n", - "```\n", - "\n", - "To get the values of `my_feature` and `other_feature`, we can `promote_and_broadcast` and `project` the following paths: `features.feature[my_feature].float_list.value` and `features.feature[other_feature].bytes_list.value`\n", - "\n", - "This results in the following dict of ragged tensors: \n", - "```\n", - "{\n", - " features.my_new_feature: \u003ctf.RaggedTensor [[[1.0]]]\u003e,\n", - " features.other_new_feature: \u003ctf.RaggedTensor [[[b'my_val']]]\u003e\n", - "}\n", - "```\n", - "\n", - "Note: we renamed `my_feature` to `my_new_feature` in the `promote_and_broadcast` (and similarly for `other_feature`)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "executionInfo": { - "elapsed": 111, - "status": "ok", - "timestamp": 1632331356173, - "user": { - "displayName": "", - "photoUrl": "", - "userId": "" - }, - "user_tz": 420 - }, - "id": "9ESrpLsEL4SO", - "outputId": "ba002544-d4b2-4ae2-f343-f1f25ab91a4d" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{features.my_new_feature: \u003ctf.RaggedTensor [[[1.0]]]\u003e, features.other_new_feature: \u003ctf.RaggedTensor [[[b'my_val']]]\u003e}\n" - ] - } - ], - "source": [ - "tf_example = text_format.Parse(\"\"\"\n", - "features {\n", - " feature {\n", - " key: \"my_feature\"\n", - " value {\n", - " float_list {\n", - " value: 1.0\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"other_feature\"\n", - " value {\n", - " bytes_list {\n", - " value: \"my_val\"\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\"\"\", tf.train.Example())\n", - "\n", - "query = s2t.create_expression_from_proto(\n", - " tf_example.SerializeToString(), tf.train.Example.DESCRIPTOR)\n", - "query = query.promote_and_broadcast({'my_new_feature': \"features.feature[my_feature].float_list.value\", \"other_new_feature\": \"features.feature[other_feature].bytes_list.value\"}, \"features\")\n", - "query = query.project([\"features.my_new_feature\", \"features.other_new_feature\"])\n", - "[prensor] = s2t.calculate_prensors([query])\n", - "ragged_tensors = prensor.get_ragged_tensors()\n", - "\n", - "print(ragged_tensors)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UxLZtkdmM7lj" - }, - "source": [ - "## Apache Parquet Support\n", - "\n", - "`struct2tensor` offers an [Apache Parquet](https://parquet.apache.org/) [tf.DataSet](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) that allows reading from a Parquet file and apply queries to manipulate the structure of the data.\n", - "\n", - "Because of the powerful struct2tensor library, the dataset will only read the Parquet columns that are required. This reduces I/O cost if we only need a select few columns." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7IHo8w3HMuyT" - }, - "source": [ - "### Preparation\n", - "\n", - "Please run the code cell at [Some Pretty Printing and Imports](#scrollTo=dIxHSM3VQfUu\u0026line=1\u0026uniqifier=1) to ensure that all required modules are imported, and that pretty print works properly.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Vu02cwvfXVpf" - }, - "source": [ - "#### Prepare the input data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 68 - }, - "id": "xEDieagzNL-9", - "outputId": "773de200-cd49-496f-ff3f-60074be06bca" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 1657 100 1657 0 0 8122 0 --:--:-- --:--:-- --:--:-- 8122\n" - ] - } - ], - "source": [ - "# Download our sample data file from the struct2tensor repository. The desciption of the data is below.\n", - "#@test {\"skip\": true} \n", - "\n", - "!curl -o dremel_example.parquet 'https://raw.githubusercontent.com/google/struct2tensor/master/struct2tensor/testdata/parquet_testdata/dremel_example.parquet'" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Gn5J62JkNN-P" - }, - "source": [ - "### Example\n", - "\n", - "We will use a sample Parquet data file (*dremel_example.parquet*), which contains data based on the example used in this paper: https://storage.googleapis.com/pub-tools-public-publication-data/pdf/36632.pdf \n", - "\n", - "The file *dremel_example.parquet* has the following schema:\n", - "\n", - "```\n", - "message Document {\n", - " required int64 DocId;\n", - " optional group Links {\n", - " repeated int64 Backward;\n", - " repeated int64 Forward; }\n", - " repeated group Name {\n", - " repeated group Language {\n", - " required string Code;\n", - " optional string Country; }\n", - " optional string Url; }}\n", - " ```\n", - "\n", - "and contains the following data:\n", - " \n", - "\n", - "```\n", - "Document\n", - " DocId: 10\n", - " Links\n", - " Forward: 20\n", - " Forward: 40\n", - " Forward: 60\n", - " Name\n", - " Language\n", - " Code: 'en-us'\n", - " Country: 'us'\n", - " Language\n", - " Code: 'en'\n", - " Url: 'http://A'\n", - " Name\n", - " Url: 'http://B'\n", - " Name\n", - " Language\n", - " Code: 'en-gb'\n", - " Country: 'gb'\n", - "Document\n", - " DocId: 20\n", - " Links\n", - " Backward: 10\n", - " Backward: 30\n", - " Forward: 80\n", - " Name\n", - " Url: 'http://C'\n", - "```\n", - "\n", - "\n", - "In this example, we will promote and broadcast the field `Links.Forward` and project it.\n", - "\n", - "batch_size will be the number of records (`Document`) per prensor. This works with optional and repeated fields, and will be able to batch the entire record.\n", - "\n", - "Feel free to try `batch_size = 2` in the below code. (Note this parquet file only has 2 records (`Document`) total).\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 442 - }, - "id": "A8RyaU3EX4av", - "outputId": "9235bd5b-cca6-45e0-8244-0f58d6bdcd9b" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.6/dist-packages/struct2tensor/expression_impl/parquet.py:65: FutureWarning: The 'field_by_name' method is deprecated, use 'field' instead\n", - " [arrow_schema.field_by_name(name) for name in arrow_schema.names]))\n", - "/usr/local/lib/python3.6/dist-packages/struct2tensor/expression_impl/parquet.py:396: FutureWarning: The 'field_by_name' method is deprecated, use 'field' instead\n", - " for step in curr_steps_as_set\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "============================\n", - "Schema of new_field prensor: \n", - "RootNodeTensor root\n", - " repeated ChildNodeTensor Name\n", - " repeated \u003cdtype: 'int64'\u003e new_field\n", - "\n", - "Sparse tensor representation: \n", - "{Name.new_field: SparseTensor(values=[20, 40, 60, 20, 40, 60, 20, 40, 60], dense_shape=[1, 3, 3], \n", - " indices=[[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 2, 0], [0, 2, 1], [0, 2, 2]])}\n", - "\n", - "============================\n", - "Schema of new_field prensor: \n", - "RootNodeTensor root\n", - " repeated ChildNodeTensor Name\n", - " repeated \u003cdtype: 'int64'\u003e new_field\n", - "\n", - "Sparse tensor representation: \n", - "{Name.new_field: SparseTensor(values=[80], dense_shape=[1, 1, 1], \n", - " indices=[[0, 0, 0]])}\n", - "\n", - "============================\n" - ] - } - ], - "source": [ - "#@test {\"skip\": true} \n", - "\n", - "from struct2tensor import expression_impl\n", - "\n", - "filenames = [\"dremel_example.parquet\"]\n", - "batch_size = 1\n", - "\n", - "exp = s2t.expression_impl.parquet.create_expression_from_parquet_file(filenames)\n", - "new_exp = exp.promote_and_broadcast({\"new_field\": \"Links.Forward\"}, \"Name\")\n", - "proj_exp = new_exp.project([\"Name.new_field\"])\n", - "proj_exp_needed = exp.project([\"Name.Url\"]) \n", - "# Please note that currently, proj_exp_needed needs to be passed into calculate.\n", - "# This is due to the way data is stored in parquet (values and repetition \u0026 \n", - "# definition levels). To construct the node for \"Name\", we need to read the \n", - "# values of a column containing \"Name\".\n", - "pqds = s2t.expression_impl.parquet.calculate_parquet_values([proj_exp, proj_exp_needed], exp, \n", - " filenames, batch_size)\n", - "\n", - "for prensors in pqds:\n", - " new_field_prensor = prensors[0]\n", - " print(\"============================\")\n", - " print(\"Schema of new_field prensor: \")\n", - " print(new_field_prensor)\n", - " print(\"\\nSparse tensor representation: \")\n", - " pretty.pprint(new_field_prensor)\n", - "print(\"============================\")" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "last_runtime": { - "kind": "private" - }, - "name": "struct2tensor: ML on structured data", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/g3doc/api_docs/python/expression_impl.md b/g3doc/api_docs/python/expression_impl.md deleted file mode 100644 index 40da37a..0000000 --- a/g3doc/api_docs/python/expression_impl.md +++ /dev/null @@ -1,70 +0,0 @@ -description: Import all modules in expression_impl. - -
- - -
- -# Module: expression_impl - - - - - - - - - -Import all modules in expression_impl. - - -The modules in this file should be accessed like the following: - -``` -import struct2tensor as s2t -from struct2tensor import expression_impl - -s2t.expression_impl.apply_schema -``` - -## Modules - -[`apply_schema`](./expression_impl/apply_schema.md) module: Apply a schema to an expression. - -[`broadcast`](./expression_impl/broadcast.md) module: Methods for broadcasting a path in a tree. - -[`depth_limit`](./expression_impl/depth_limit.md) module: Caps the depth of an expression. - -[`filter_expression`](./expression_impl/filter_expression.md) module: Create a new expression that is a filtered version of an original one. - -[`index`](./expression_impl/index.md) module: get_positional_index and get_index_from_end methods. - -[`map_prensor`](./expression_impl/map_prensor.md) module: Arbitrary operations from sparse and ragged tensors to a leaf field. - -[`map_prensor_to_prensor`](./expression_impl/map_prensor_to_prensor.md) module: Arbitrary operations from prensors to prensors in an expression. - -[`map_values`](./expression_impl/map_values.md) module: Maps the values of various leaves of the same child to a single result. - -[`parquet`](./expression_impl/parquet.md) module: Apache Parquet Dataset. - -[`placeholder`](./expression_impl/placeholder.md) module: Placeholder expression. - -[`project`](./expression_impl/project.md) module: project selects a subtree of an expression. - -[`promote`](./expression_impl/promote.md) module: Promote an expression to be a child of its grandparent. - -[`promote_and_broadcast`](./expression_impl/promote_and_broadcast.md) module: promote_and_broadcast a set of nodes. - -[`proto`](./expression_impl/proto.md) module: Expressions to parse a proto. - -[`reroot`](./expression_impl/reroot.md) module: Reroot to a subtree, maintaining an input proto index. - -[`size`](./expression_impl/size.md) module: Functions for creating new size or has expression. - -[`slice_expression`](./expression_impl/slice_expression.md) module: Implementation of slice. - diff --git a/g3doc/api_docs/python/expression_impl/_api_cache.json b/g3doc/api_docs/python/expression_impl/_api_cache.json deleted file mode 100644 index 0db22bd..0000000 --- a/g3doc/api_docs/python/expression_impl/_api_cache.json +++ /dev/null @@ -1,429 +0,0 @@ -{ - "duplicate_of": { - "expression_impl.parquet.ParquetDataset.__eq__": "expression_impl.map_prensor_to_prensor.Schema.__eq__", - "expression_impl.parquet.ParquetDataset.__ge__": "expression_impl.map_prensor_to_prensor.Schema.__ge__", - "expression_impl.parquet.ParquetDataset.__gt__": "expression_impl.map_prensor_to_prensor.Schema.__gt__", - "expression_impl.parquet.ParquetDataset.__le__": "expression_impl.map_prensor_to_prensor.Schema.__le__", - "expression_impl.parquet.ParquetDataset.__lt__": "expression_impl.map_prensor_to_prensor.Schema.__lt__", - "expression_impl.parquet.ParquetDataset.__ne__": "expression_impl.map_prensor_to_prensor.Schema.__ne__", - "expression_impl.parquet.ParquetDataset.__new__": "expression_impl.map_prensor_to_prensor.Schema.__new__", - "expression_impl.parquet.ParquetDataset.__nonzero__": "expression_impl.parquet.ParquetDataset.__bool__", - "expression_impl.promote.PromoteChildExpression.__ge__": "expression_impl.map_prensor_to_prensor.Schema.__ge__", - "expression_impl.promote.PromoteChildExpression.__gt__": "expression_impl.map_prensor_to_prensor.Schema.__gt__", - "expression_impl.promote.PromoteChildExpression.__le__": "expression_impl.map_prensor_to_prensor.Schema.__le__", - "expression_impl.promote.PromoteChildExpression.__lt__": "expression_impl.map_prensor_to_prensor.Schema.__lt__", - "expression_impl.promote.PromoteChildExpression.__ne__": "expression_impl.map_prensor_to_prensor.Schema.__ne__", - "expression_impl.promote.PromoteChildExpression.__new__": "expression_impl.map_prensor_to_prensor.Schema.__new__", - "expression_impl.promote.PromoteExpression.__eq__": "expression_impl.promote.PromoteChildExpression.__eq__", - "expression_impl.promote.PromoteExpression.__ge__": "expression_impl.map_prensor_to_prensor.Schema.__ge__", - "expression_impl.promote.PromoteExpression.__gt__": "expression_impl.map_prensor_to_prensor.Schema.__gt__", - "expression_impl.promote.PromoteExpression.__le__": "expression_impl.map_prensor_to_prensor.Schema.__le__", - "expression_impl.promote.PromoteExpression.__lt__": "expression_impl.map_prensor_to_prensor.Schema.__lt__", - "expression_impl.promote.PromoteExpression.__ne__": "expression_impl.map_prensor_to_prensor.Schema.__ne__", - "expression_impl.promote.PromoteExpression.__new__": "expression_impl.map_prensor_to_prensor.Schema.__new__", - "expression_impl.promote.PromoteExpression.apply": "expression_impl.promote.PromoteChildExpression.apply", - "expression_impl.promote.PromoteExpression.apply_schema": "expression_impl.promote.PromoteChildExpression.apply_schema", - "expression_impl.promote.PromoteExpression.broadcast": "expression_impl.promote.PromoteChildExpression.broadcast", - "expression_impl.promote.PromoteExpression.cogroup_by_index": "expression_impl.promote.PromoteChildExpression.cogroup_by_index", - "expression_impl.promote.PromoteExpression.create_has_field": "expression_impl.promote.PromoteChildExpression.create_has_field", - "expression_impl.promote.PromoteExpression.create_proto_index": "expression_impl.promote.PromoteChildExpression.create_proto_index", - "expression_impl.promote.PromoteExpression.create_size_field": "expression_impl.promote.PromoteChildExpression.create_size_field", - "expression_impl.promote.PromoteExpression.get_child": "expression_impl.promote.PromoteChildExpression.get_child", - "expression_impl.promote.PromoteExpression.get_child_or_error": "expression_impl.promote.PromoteChildExpression.get_child_or_error", - "expression_impl.promote.PromoteExpression.get_descendant": "expression_impl.promote.PromoteChildExpression.get_descendant", - "expression_impl.promote.PromoteExpression.get_descendant_or_error": "expression_impl.promote.PromoteChildExpression.get_descendant_or_error", - "expression_impl.promote.PromoteExpression.get_known_children": "expression_impl.promote.PromoteChildExpression.get_known_children", - "expression_impl.promote.PromoteExpression.get_known_descendants": "expression_impl.promote.PromoteChildExpression.get_known_descendants", - "expression_impl.promote.PromoteExpression.get_paths_with_schema": "expression_impl.promote.PromoteChildExpression.get_paths_with_schema", - "expression_impl.promote.PromoteExpression.get_schema": "expression_impl.promote.PromoteChildExpression.get_schema", - "expression_impl.promote.PromoteExpression.is_leaf": "expression_impl.promote.PromoteChildExpression.is_leaf", - "expression_impl.promote.PromoteExpression.is_repeated": "expression_impl.promote.PromoteChildExpression.is_repeated", - "expression_impl.promote.PromoteExpression.map_field_values": "expression_impl.promote.PromoteChildExpression.map_field_values", - "expression_impl.promote.PromoteExpression.map_ragged_tensors": "expression_impl.promote.PromoteChildExpression.map_ragged_tensors", - "expression_impl.promote.PromoteExpression.map_sparse_tensors": "expression_impl.promote.PromoteChildExpression.map_sparse_tensors", - "expression_impl.promote.PromoteExpression.project": "expression_impl.promote.PromoteChildExpression.project", - "expression_impl.promote.PromoteExpression.promote": "expression_impl.promote.PromoteChildExpression.promote", - "expression_impl.promote.PromoteExpression.promote_and_broadcast": "expression_impl.promote.PromoteChildExpression.promote_and_broadcast", - "expression_impl.promote.PromoteExpression.reroot": "expression_impl.promote.PromoteChildExpression.reroot", - "expression_impl.promote.PromoteExpression.schema_feature": "expression_impl.promote.PromoteChildExpression.schema_feature", - "expression_impl.promote.PromoteExpression.schema_string": "expression_impl.promote.PromoteChildExpression.schema_string", - "expression_impl.promote.PromoteExpression.slice": "expression_impl.promote.PromoteChildExpression.slice", - "expression_impl.promote.PromoteExpression.truncate": "expression_impl.promote.PromoteChildExpression.truncate", - "expression_impl.promote.PromoteExpression.type": "expression_impl.promote.PromoteChildExpression.type", - "expression_impl.proto.DescriptorPool.__eq__": "expression_impl.map_prensor_to_prensor.Schema.__eq__", - "expression_impl.proto.DescriptorPool.__ge__": "expression_impl.map_prensor_to_prensor.Schema.__ge__", - "expression_impl.proto.DescriptorPool.__gt__": "expression_impl.map_prensor_to_prensor.Schema.__gt__", - "expression_impl.proto.DescriptorPool.__le__": "expression_impl.map_prensor_to_prensor.Schema.__le__", - "expression_impl.proto.DescriptorPool.__lt__": "expression_impl.map_prensor_to_prensor.Schema.__lt__", - "expression_impl.proto.DescriptorPool.__ne__": "expression_impl.map_prensor_to_prensor.Schema.__ne__", - "expression_impl.proto.TransformFn.__eq__": "expression_impl.map_prensor_to_prensor.Schema.__eq__", - "expression_impl.proto.TransformFn.__ge__": "expression_impl.map_prensor_to_prensor.Schema.__ge__", - "expression_impl.proto.TransformFn.__gt__": "expression_impl.map_prensor_to_prensor.Schema.__gt__", - "expression_impl.proto.TransformFn.__le__": "expression_impl.map_prensor_to_prensor.Schema.__le__", - "expression_impl.proto.TransformFn.__lt__": "expression_impl.map_prensor_to_prensor.Schema.__lt__", - "expression_impl.proto.TransformFn.__ne__": "expression_impl.map_prensor_to_prensor.Schema.__ne__", - "expression_impl.size.SizeExpression.__eq__": "expression_impl.promote.PromoteChildExpression.__eq__", - "expression_impl.size.SizeExpression.__ge__": "expression_impl.map_prensor_to_prensor.Schema.__ge__", - "expression_impl.size.SizeExpression.__gt__": "expression_impl.map_prensor_to_prensor.Schema.__gt__", - "expression_impl.size.SizeExpression.__le__": "expression_impl.map_prensor_to_prensor.Schema.__le__", - "expression_impl.size.SizeExpression.__lt__": "expression_impl.map_prensor_to_prensor.Schema.__lt__", - "expression_impl.size.SizeExpression.__ne__": "expression_impl.map_prensor_to_prensor.Schema.__ne__", - "expression_impl.size.SizeExpression.__new__": "expression_impl.map_prensor_to_prensor.Schema.__new__", - "expression_impl.size.SizeExpression.apply": "expression_impl.promote.PromoteChildExpression.apply", - "expression_impl.size.SizeExpression.apply_schema": "expression_impl.promote.PromoteChildExpression.apply_schema", - "expression_impl.size.SizeExpression.broadcast": "expression_impl.promote.PromoteChildExpression.broadcast", - "expression_impl.size.SizeExpression.cogroup_by_index": "expression_impl.promote.PromoteChildExpression.cogroup_by_index", - "expression_impl.size.SizeExpression.create_has_field": "expression_impl.promote.PromoteChildExpression.create_has_field", - "expression_impl.size.SizeExpression.create_proto_index": "expression_impl.promote.PromoteChildExpression.create_proto_index", - "expression_impl.size.SizeExpression.create_size_field": "expression_impl.promote.PromoteChildExpression.create_size_field", - "expression_impl.size.SizeExpression.get_child": "expression_impl.promote.PromoteChildExpression.get_child", - "expression_impl.size.SizeExpression.get_child_or_error": "expression_impl.promote.PromoteChildExpression.get_child_or_error", - "expression_impl.size.SizeExpression.get_descendant": "expression_impl.promote.PromoteChildExpression.get_descendant", - "expression_impl.size.SizeExpression.get_descendant_or_error": "expression_impl.promote.PromoteChildExpression.get_descendant_or_error", - "expression_impl.size.SizeExpression.get_known_children": "expression_impl.promote.PromoteChildExpression.get_known_children", - "expression_impl.size.SizeExpression.get_known_descendants": "expression_impl.promote.PromoteChildExpression.get_known_descendants", - "expression_impl.size.SizeExpression.get_paths_with_schema": "expression_impl.promote.PromoteChildExpression.get_paths_with_schema", - "expression_impl.size.SizeExpression.get_schema": "expression_impl.promote.PromoteChildExpression.get_schema", - "expression_impl.size.SizeExpression.is_leaf": "expression_impl.promote.PromoteChildExpression.is_leaf", - "expression_impl.size.SizeExpression.is_repeated": "expression_impl.promote.PromoteChildExpression.is_repeated", - "expression_impl.size.SizeExpression.known_field_names": "expression_impl.promote.PromoteExpression.known_field_names", - "expression_impl.size.SizeExpression.map_field_values": "expression_impl.promote.PromoteChildExpression.map_field_values", - "expression_impl.size.SizeExpression.map_ragged_tensors": "expression_impl.promote.PromoteChildExpression.map_ragged_tensors", - "expression_impl.size.SizeExpression.map_sparse_tensors": "expression_impl.promote.PromoteChildExpression.map_sparse_tensors", - "expression_impl.size.SizeExpression.project": "expression_impl.promote.PromoteChildExpression.project", - "expression_impl.size.SizeExpression.promote": "expression_impl.promote.PromoteChildExpression.promote", - "expression_impl.size.SizeExpression.promote_and_broadcast": "expression_impl.promote.PromoteChildExpression.promote_and_broadcast", - "expression_impl.size.SizeExpression.reroot": "expression_impl.promote.PromoteChildExpression.reroot", - "expression_impl.size.SizeExpression.schema_feature": "expression_impl.promote.PromoteChildExpression.schema_feature", - "expression_impl.size.SizeExpression.schema_string": "expression_impl.promote.PromoteChildExpression.schema_string", - "expression_impl.size.SizeExpression.slice": "expression_impl.promote.PromoteChildExpression.slice", - "expression_impl.size.SizeExpression.truncate": "expression_impl.promote.PromoteChildExpression.truncate", - "expression_impl.size.SizeExpression.type": "expression_impl.promote.PromoteChildExpression.type" - }, - "is_fragment": { - "expression_impl": false, - "expression_impl.apply_schema": false, - "expression_impl.apply_schema.apply_schema": false, - "expression_impl.broadcast": false, - "expression_impl.broadcast.broadcast": false, - "expression_impl.broadcast.broadcast_anonymous": false, - "expression_impl.depth_limit": false, - "expression_impl.depth_limit.limit_depth": false, - "expression_impl.filter_expression": false, - "expression_impl.filter_expression.filter_by_child": false, - "expression_impl.filter_expression.filter_by_sibling": false, - "expression_impl.index": false, - "expression_impl.index.get_index_from_end": false, - "expression_impl.index.get_positional_index": false, - "expression_impl.map_prensor": false, - "expression_impl.map_prensor.map_ragged_tensor": false, - "expression_impl.map_prensor.map_sparse_tensor": false, - "expression_impl.map_prensor_to_prensor": false, - "expression_impl.map_prensor_to_prensor.Schema": false, - "expression_impl.map_prensor_to_prensor.Schema.__eq__": true, - "expression_impl.map_prensor_to_prensor.Schema.__ge__": true, - "expression_impl.map_prensor_to_prensor.Schema.__gt__": true, - "expression_impl.map_prensor_to_prensor.Schema.__init__": true, - "expression_impl.map_prensor_to_prensor.Schema.__le__": true, - "expression_impl.map_prensor_to_prensor.Schema.__lt__": true, - "expression_impl.map_prensor_to_prensor.Schema.__ne__": true, - "expression_impl.map_prensor_to_prensor.Schema.__new__": true, - "expression_impl.map_prensor_to_prensor.Schema.get_child": true, - "expression_impl.map_prensor_to_prensor.Schema.is_repeated": true, - "expression_impl.map_prensor_to_prensor.Schema.known_field_names": true, - "expression_impl.map_prensor_to_prensor.Schema.schema_feature": true, - "expression_impl.map_prensor_to_prensor.Schema.type": true, - "expression_impl.map_prensor_to_prensor.create_schema": false, - "expression_impl.map_prensor_to_prensor.map_prensor_to_prensor": false, - "expression_impl.map_values": false, - "expression_impl.map_values.map_many_values": false, - "expression_impl.map_values.map_values": false, - "expression_impl.map_values.map_values_anonymous": false, - "expression_impl.parquet": false, - "expression_impl.parquet.ParquetDataset": false, - "expression_impl.parquet.ParquetDataset.__bool__": true, - "expression_impl.parquet.ParquetDataset.__eq__": true, - "expression_impl.parquet.ParquetDataset.__ge__": true, - "expression_impl.parquet.ParquetDataset.__gt__": true, - "expression_impl.parquet.ParquetDataset.__init__": true, - "expression_impl.parquet.ParquetDataset.__iter__": true, - "expression_impl.parquet.ParquetDataset.__le__": true, - "expression_impl.parquet.ParquetDataset.__len__": true, - "expression_impl.parquet.ParquetDataset.__lt__": true, - "expression_impl.parquet.ParquetDataset.__ne__": true, - "expression_impl.parquet.ParquetDataset.__new__": true, - "expression_impl.parquet.ParquetDataset.__nonzero__": true, - "expression_impl.parquet.ParquetDataset.apply": true, - "expression_impl.parquet.ParquetDataset.as_numpy_iterator": true, - "expression_impl.parquet.ParquetDataset.batch": true, - "expression_impl.parquet.ParquetDataset.cache": true, - "expression_impl.parquet.ParquetDataset.cardinality": true, - "expression_impl.parquet.ParquetDataset.concatenate": true, - "expression_impl.parquet.ParquetDataset.element_spec": true, - "expression_impl.parquet.ParquetDataset.enumerate": true, - "expression_impl.parquet.ParquetDataset.filter": true, - "expression_impl.parquet.ParquetDataset.filter_with_legacy_function": true, - "expression_impl.parquet.ParquetDataset.flat_map": true, - "expression_impl.parquet.ParquetDataset.from_generator": true, - "expression_impl.parquet.ParquetDataset.from_sparse_tensor_slices": true, - "expression_impl.parquet.ParquetDataset.from_tensor_slices": true, - "expression_impl.parquet.ParquetDataset.from_tensors": true, - "expression_impl.parquet.ParquetDataset.get_single_element": true, - "expression_impl.parquet.ParquetDataset.group_by_window": true, - "expression_impl.parquet.ParquetDataset.interleave": true, - "expression_impl.parquet.ParquetDataset.list_files": true, - "expression_impl.parquet.ParquetDataset.make_initializable_iterator": true, - "expression_impl.parquet.ParquetDataset.make_one_shot_iterator": true, - "expression_impl.parquet.ParquetDataset.map": true, - "expression_impl.parquet.ParquetDataset.map_with_legacy_function": true, - "expression_impl.parquet.ParquetDataset.options": true, - "expression_impl.parquet.ParquetDataset.output_classes": true, - "expression_impl.parquet.ParquetDataset.output_shapes": true, - "expression_impl.parquet.ParquetDataset.output_types": true, - "expression_impl.parquet.ParquetDataset.padded_batch": true, - "expression_impl.parquet.ParquetDataset.prefetch": true, - "expression_impl.parquet.ParquetDataset.range": true, - "expression_impl.parquet.ParquetDataset.reduce": true, - "expression_impl.parquet.ParquetDataset.repeat": true, - "expression_impl.parquet.ParquetDataset.shard": true, - "expression_impl.parquet.ParquetDataset.shuffle": true, - "expression_impl.parquet.ParquetDataset.skip": true, - "expression_impl.parquet.ParquetDataset.take": true, - "expression_impl.parquet.ParquetDataset.unbatch": true, - "expression_impl.parquet.ParquetDataset.window": true, - "expression_impl.parquet.ParquetDataset.with_options": true, - "expression_impl.parquet.ParquetDataset.zip": true, - "expression_impl.parquet.calculate_parquet_values": false, - "expression_impl.parquet.create_expression_from_parquet_file": false, - "expression_impl.placeholder": false, - "expression_impl.placeholder.create_expression_from_schema": false, - "expression_impl.placeholder.get_placeholder_paths_from_graph": false, - "expression_impl.project": false, - "expression_impl.project.project": false, - "expression_impl.promote": false, - "expression_impl.promote.PromoteChildExpression": false, - "expression_impl.promote.PromoteChildExpression.__eq__": true, - "expression_impl.promote.PromoteChildExpression.__ge__": true, - "expression_impl.promote.PromoteChildExpression.__gt__": true, - "expression_impl.promote.PromoteChildExpression.__init__": true, - "expression_impl.promote.PromoteChildExpression.__le__": true, - "expression_impl.promote.PromoteChildExpression.__lt__": true, - "expression_impl.promote.PromoteChildExpression.__ne__": true, - "expression_impl.promote.PromoteChildExpression.__new__": true, - "expression_impl.promote.PromoteChildExpression.apply": true, - "expression_impl.promote.PromoteChildExpression.apply_schema": true, - "expression_impl.promote.PromoteChildExpression.broadcast": true, - "expression_impl.promote.PromoteChildExpression.calculate": true, - "expression_impl.promote.PromoteChildExpression.calculation_equal": true, - "expression_impl.promote.PromoteChildExpression.calculation_is_identity": true, - "expression_impl.promote.PromoteChildExpression.cogroup_by_index": true, - "expression_impl.promote.PromoteChildExpression.create_has_field": true, - "expression_impl.promote.PromoteChildExpression.create_proto_index": true, - "expression_impl.promote.PromoteChildExpression.create_size_field": true, - "expression_impl.promote.PromoteChildExpression.get_child": true, - "expression_impl.promote.PromoteChildExpression.get_child_or_error": true, - "expression_impl.promote.PromoteChildExpression.get_descendant": true, - "expression_impl.promote.PromoteChildExpression.get_descendant_or_error": true, - "expression_impl.promote.PromoteChildExpression.get_known_children": true, - "expression_impl.promote.PromoteChildExpression.get_known_descendants": true, - "expression_impl.promote.PromoteChildExpression.get_paths_with_schema": true, - "expression_impl.promote.PromoteChildExpression.get_schema": true, - "expression_impl.promote.PromoteChildExpression.get_source_expressions": true, - "expression_impl.promote.PromoteChildExpression.is_leaf": true, - "expression_impl.promote.PromoteChildExpression.is_repeated": true, - "expression_impl.promote.PromoteChildExpression.known_field_names": true, - "expression_impl.promote.PromoteChildExpression.map_field_values": true, - "expression_impl.promote.PromoteChildExpression.map_ragged_tensors": true, - "expression_impl.promote.PromoteChildExpression.map_sparse_tensors": true, - "expression_impl.promote.PromoteChildExpression.project": true, - "expression_impl.promote.PromoteChildExpression.promote": true, - "expression_impl.promote.PromoteChildExpression.promote_and_broadcast": true, - "expression_impl.promote.PromoteChildExpression.reroot": true, - "expression_impl.promote.PromoteChildExpression.schema_feature": true, - "expression_impl.promote.PromoteChildExpression.schema_string": true, - "expression_impl.promote.PromoteChildExpression.slice": true, - "expression_impl.promote.PromoteChildExpression.truncate": true, - "expression_impl.promote.PromoteChildExpression.type": true, - "expression_impl.promote.PromoteExpression": false, - "expression_impl.promote.PromoteExpression.__eq__": true, - "expression_impl.promote.PromoteExpression.__ge__": true, - "expression_impl.promote.PromoteExpression.__gt__": true, - "expression_impl.promote.PromoteExpression.__init__": true, - "expression_impl.promote.PromoteExpression.__le__": true, - "expression_impl.promote.PromoteExpression.__lt__": true, - "expression_impl.promote.PromoteExpression.__ne__": true, - "expression_impl.promote.PromoteExpression.__new__": true, - "expression_impl.promote.PromoteExpression.apply": true, - "expression_impl.promote.PromoteExpression.apply_schema": true, - "expression_impl.promote.PromoteExpression.broadcast": true, - "expression_impl.promote.PromoteExpression.calculate": true, - "expression_impl.promote.PromoteExpression.calculation_equal": true, - "expression_impl.promote.PromoteExpression.calculation_is_identity": true, - "expression_impl.promote.PromoteExpression.cogroup_by_index": true, - "expression_impl.promote.PromoteExpression.create_has_field": true, - "expression_impl.promote.PromoteExpression.create_proto_index": true, - "expression_impl.promote.PromoteExpression.create_size_field": true, - "expression_impl.promote.PromoteExpression.get_child": true, - "expression_impl.promote.PromoteExpression.get_child_or_error": true, - "expression_impl.promote.PromoteExpression.get_descendant": true, - "expression_impl.promote.PromoteExpression.get_descendant_or_error": true, - "expression_impl.promote.PromoteExpression.get_known_children": true, - "expression_impl.promote.PromoteExpression.get_known_descendants": true, - "expression_impl.promote.PromoteExpression.get_paths_with_schema": true, - "expression_impl.promote.PromoteExpression.get_schema": true, - "expression_impl.promote.PromoteExpression.get_source_expressions": true, - "expression_impl.promote.PromoteExpression.is_leaf": true, - "expression_impl.promote.PromoteExpression.is_repeated": true, - "expression_impl.promote.PromoteExpression.known_field_names": true, - "expression_impl.promote.PromoteExpression.map_field_values": true, - "expression_impl.promote.PromoteExpression.map_ragged_tensors": true, - "expression_impl.promote.PromoteExpression.map_sparse_tensors": true, - "expression_impl.promote.PromoteExpression.project": true, - "expression_impl.promote.PromoteExpression.promote": true, - "expression_impl.promote.PromoteExpression.promote_and_broadcast": true, - "expression_impl.promote.PromoteExpression.reroot": true, - "expression_impl.promote.PromoteExpression.schema_feature": true, - "expression_impl.promote.PromoteExpression.schema_string": true, - "expression_impl.promote.PromoteExpression.slice": true, - "expression_impl.promote.PromoteExpression.truncate": true, - "expression_impl.promote.PromoteExpression.type": true, - "expression_impl.promote.promote": false, - "expression_impl.promote.promote_anonymous": false, - "expression_impl.promote_and_broadcast": false, - "expression_impl.promote_and_broadcast.promote_and_broadcast": false, - "expression_impl.promote_and_broadcast.promote_and_broadcast_anonymous": false, - "expression_impl.proto": false, - "expression_impl.proto.DescriptorPool": false, - "expression_impl.proto.DescriptorPool.Add": true, - "expression_impl.proto.DescriptorPool.AddDescriptor": true, - "expression_impl.proto.DescriptorPool.AddEnumDescriptor": true, - "expression_impl.proto.DescriptorPool.AddExtensionDescriptor": true, - "expression_impl.proto.DescriptorPool.AddFileDescriptor": true, - "expression_impl.proto.DescriptorPool.AddSerializedFile": true, - "expression_impl.proto.DescriptorPool.AddServiceDescriptor": true, - "expression_impl.proto.DescriptorPool.FindAllExtensions": true, - "expression_impl.proto.DescriptorPool.FindEnumTypeByName": true, - "expression_impl.proto.DescriptorPool.FindExtensionByName": true, - "expression_impl.proto.DescriptorPool.FindExtensionByNumber": true, - "expression_impl.proto.DescriptorPool.FindFieldByName": true, - "expression_impl.proto.DescriptorPool.FindFileByName": true, - "expression_impl.proto.DescriptorPool.FindFileContainingSymbol": true, - "expression_impl.proto.DescriptorPool.FindMessageTypeByName": true, - "expression_impl.proto.DescriptorPool.FindMethodByName": true, - "expression_impl.proto.DescriptorPool.FindOneofByName": true, - "expression_impl.proto.DescriptorPool.FindServiceByName": true, - "expression_impl.proto.DescriptorPool.__eq__": true, - "expression_impl.proto.DescriptorPool.__ge__": true, - "expression_impl.proto.DescriptorPool.__gt__": true, - "expression_impl.proto.DescriptorPool.__init__": true, - "expression_impl.proto.DescriptorPool.__le__": true, - "expression_impl.proto.DescriptorPool.__lt__": true, - "expression_impl.proto.DescriptorPool.__ne__": true, - "expression_impl.proto.DescriptorPool.__new__": true, - "expression_impl.proto.FileDescriptorSet": false, - "expression_impl.proto.FileDescriptorSet.ByteSize": true, - "expression_impl.proto.FileDescriptorSet.Clear": true, - "expression_impl.proto.FileDescriptorSet.ClearExtension": true, - "expression_impl.proto.FileDescriptorSet.ClearField": true, - "expression_impl.proto.FileDescriptorSet.CopyFrom": true, - "expression_impl.proto.FileDescriptorSet.DESCRIPTOR": true, - "expression_impl.proto.FileDescriptorSet.DiscardUnknownFields": true, - "expression_impl.proto.FileDescriptorSet.Extensions": true, - "expression_impl.proto.FileDescriptorSet.FindInitializationErrors": true, - "expression_impl.proto.FileDescriptorSet.FromString": true, - "expression_impl.proto.FileDescriptorSet.HasExtension": true, - "expression_impl.proto.FileDescriptorSet.HasField": true, - "expression_impl.proto.FileDescriptorSet.IsInitialized": true, - "expression_impl.proto.FileDescriptorSet.ListFields": true, - "expression_impl.proto.FileDescriptorSet.MergeFrom": true, - "expression_impl.proto.FileDescriptorSet.MergeFromString": true, - "expression_impl.proto.FileDescriptorSet.ParseFromString": true, - "expression_impl.proto.FileDescriptorSet.RegisterExtension": true, - "expression_impl.proto.FileDescriptorSet.SerializePartialToString": true, - "expression_impl.proto.FileDescriptorSet.SerializeToString": true, - "expression_impl.proto.FileDescriptorSet.SetInParent": true, - "expression_impl.proto.FileDescriptorSet.UnknownFields": true, - "expression_impl.proto.FileDescriptorSet.WhichOneof": true, - "expression_impl.proto.FileDescriptorSet.__eq__": true, - "expression_impl.proto.FileDescriptorSet.__ge__": true, - "expression_impl.proto.FileDescriptorSet.__gt__": true, - "expression_impl.proto.FileDescriptorSet.__init__": true, - "expression_impl.proto.FileDescriptorSet.__le__": true, - "expression_impl.proto.FileDescriptorSet.__lt__": true, - "expression_impl.proto.FileDescriptorSet.__ne__": true, - "expression_impl.proto.FileDescriptorSet.__new__": true, - "expression_impl.proto.FileDescriptorSet.file": true, - "expression_impl.proto.ProtoExpression": false, - "expression_impl.proto.TransformFn": false, - "expression_impl.proto.TransformFn.__call__": true, - "expression_impl.proto.TransformFn.__eq__": true, - "expression_impl.proto.TransformFn.__ge__": true, - "expression_impl.proto.TransformFn.__gt__": true, - "expression_impl.proto.TransformFn.__init__": true, - "expression_impl.proto.TransformFn.__le__": true, - "expression_impl.proto.TransformFn.__lt__": true, - "expression_impl.proto.TransformFn.__ne__": true, - "expression_impl.proto.TransformFn.__new__": true, - "expression_impl.proto.create_expression_from_file_descriptor_set": false, - "expression_impl.proto.create_expression_from_proto": false, - "expression_impl.proto.create_transformed_field": false, - "expression_impl.proto.is_proto_expression": false, - "expression_impl.reroot": false, - "expression_impl.reroot.create_proto_index_field": false, - "expression_impl.reroot.reroot": false, - "expression_impl.size": false, - "expression_impl.size.SizeExpression": false, - "expression_impl.size.SizeExpression.__eq__": true, - "expression_impl.size.SizeExpression.__ge__": true, - "expression_impl.size.SizeExpression.__gt__": true, - "expression_impl.size.SizeExpression.__init__": true, - "expression_impl.size.SizeExpression.__le__": true, - "expression_impl.size.SizeExpression.__lt__": true, - "expression_impl.size.SizeExpression.__ne__": true, - "expression_impl.size.SizeExpression.__new__": true, - "expression_impl.size.SizeExpression.apply": true, - "expression_impl.size.SizeExpression.apply_schema": true, - "expression_impl.size.SizeExpression.broadcast": true, - "expression_impl.size.SizeExpression.calculate": true, - "expression_impl.size.SizeExpression.calculation_equal": true, - "expression_impl.size.SizeExpression.calculation_is_identity": true, - "expression_impl.size.SizeExpression.cogroup_by_index": true, - "expression_impl.size.SizeExpression.create_has_field": true, - "expression_impl.size.SizeExpression.create_proto_index": true, - "expression_impl.size.SizeExpression.create_size_field": true, - "expression_impl.size.SizeExpression.get_child": true, - "expression_impl.size.SizeExpression.get_child_or_error": true, - "expression_impl.size.SizeExpression.get_descendant": true, - "expression_impl.size.SizeExpression.get_descendant_or_error": true, - "expression_impl.size.SizeExpression.get_known_children": true, - "expression_impl.size.SizeExpression.get_known_descendants": true, - "expression_impl.size.SizeExpression.get_paths_with_schema": true, - "expression_impl.size.SizeExpression.get_schema": true, - "expression_impl.size.SizeExpression.get_source_expressions": true, - "expression_impl.size.SizeExpression.is_leaf": true, - "expression_impl.size.SizeExpression.is_repeated": true, - "expression_impl.size.SizeExpression.known_field_names": true, - "expression_impl.size.SizeExpression.map_field_values": true, - "expression_impl.size.SizeExpression.map_ragged_tensors": true, - "expression_impl.size.SizeExpression.map_sparse_tensors": true, - "expression_impl.size.SizeExpression.project": true, - "expression_impl.size.SizeExpression.promote": true, - "expression_impl.size.SizeExpression.promote_and_broadcast": true, - "expression_impl.size.SizeExpression.reroot": true, - "expression_impl.size.SizeExpression.schema_feature": true, - "expression_impl.size.SizeExpression.schema_string": true, - "expression_impl.size.SizeExpression.slice": true, - "expression_impl.size.SizeExpression.truncate": true, - "expression_impl.size.SizeExpression.type": true, - "expression_impl.size.has": false, - "expression_impl.size.size": false, - "expression_impl.size.size_anonymous": false, - "expression_impl.slice_expression": false, - "expression_impl.slice_expression.IndexValue": false, - "expression_impl.slice_expression.slice_expression": false - }, - "py_module_names": [ - "expression_impl" - ], - "site_link": null -} diff --git a/g3doc/api_docs/python/expression_impl/_toc.yaml b/g3doc/api_docs/python/expression_impl/_toc.yaml deleted file mode 100644 index b2bf7ea..0000000 --- a/g3doc/api_docs/python/expression_impl/_toc.yaml +++ /dev/null @@ -1,161 +0,0 @@ -toc: -- title: expression_impl - section: - - title: Overview - path: /api_docs/python/expression_impl -- title: expression_impl.apply_schema - section: - - title: Overview - path: /api_docs/python/expression_impl/apply_schema - - title: apply_schema - path: /api_docs/python/expression_impl/apply_schema/apply_schema -- title: expression_impl.broadcast - section: - - title: Overview - path: /api_docs/python/expression_impl/broadcast - - title: broadcast - path: /api_docs/python/expression_impl/broadcast/broadcast - - title: broadcast_anonymous - path: /api_docs/python/expression_impl/broadcast/broadcast_anonymous -- title: expression_impl.depth_limit - section: - - title: Overview - path: /api_docs/python/expression_impl/depth_limit - - title: limit_depth - path: /api_docs/python/expression_impl/depth_limit/limit_depth -- title: expression_impl.filter_expression - section: - - title: Overview - path: /api_docs/python/expression_impl/filter_expression - - title: filter_by_child - path: /api_docs/python/expression_impl/filter_expression/filter_by_child - - title: filter_by_sibling - path: /api_docs/python/expression_impl/filter_expression/filter_by_sibling -- title: expression_impl.index - section: - - title: Overview - path: /api_docs/python/expression_impl/index - - title: get_index_from_end - path: /api_docs/python/expression_impl/index/get_index_from_end - - title: get_positional_index - path: /api_docs/python/expression_impl/index/get_positional_index -- title: expression_impl.map_prensor - section: - - title: Overview - path: /api_docs/python/expression_impl/map_prensor - - title: map_ragged_tensor - path: /api_docs/python/expression_impl/map_prensor/map_ragged_tensor - - title: map_sparse_tensor - path: /api_docs/python/expression_impl/map_prensor/map_sparse_tensor -- title: expression_impl.map_prensor_to_prensor - section: - - title: Overview - path: /api_docs/python/expression_impl/map_prensor_to_prensor - - title: Schema - path: /api_docs/python/expression_impl/map_prensor_to_prensor/Schema - - title: create_schema - path: /api_docs/python/expression_impl/map_prensor_to_prensor/create_schema - - title: map_prensor_to_prensor - path: /api_docs/python/expression_impl/map_prensor_to_prensor/map_prensor_to_prensor -- title: expression_impl.map_values - section: - - title: Overview - path: /api_docs/python/expression_impl/map_values - - title: map_many_values - path: /api_docs/python/expression_impl/map_values/map_many_values - - title: map_values - path: /api_docs/python/expression_impl/map_values/map_values - - title: map_values_anonymous - path: /api_docs/python/expression_impl/map_values/map_values_anonymous -- title: expression_impl.parquet - section: - - title: Overview - path: /api_docs/python/expression_impl/parquet - - title: ParquetDataset - path: /api_docs/python/expression_impl/parquet/ParquetDataset - - title: calculate_parquet_values - path: /api_docs/python/expression_impl/parquet/calculate_parquet_values - - title: create_expression_from_parquet_file - path: /api_docs/python/expression_impl/parquet/create_expression_from_parquet_file -- title: expression_impl.placeholder - section: - - title: Overview - path: /api_docs/python/expression_impl/placeholder - - title: create_expression_from_schema - path: /api_docs/python/expression_impl/placeholder/create_expression_from_schema - - title: get_placeholder_paths_from_graph - path: /api_docs/python/expression_impl/placeholder/get_placeholder_paths_from_graph -- title: expression_impl.project - section: - - title: Overview - path: /api_docs/python/expression_impl/project - - title: project - path: /api_docs/python/expression_impl/project/project -- title: expression_impl.promote - section: - - title: Overview - path: /api_docs/python/expression_impl/promote - - title: PromoteChildExpression - path: /api_docs/python/expression_impl/promote/PromoteChildExpression - - title: PromoteExpression - path: /api_docs/python/expression_impl/promote/PromoteExpression - - title: promote - path: /api_docs/python/expression_impl/promote/promote - - title: promote_anonymous - path: /api_docs/python/expression_impl/promote/promote_anonymous -- title: expression_impl.promote_and_broadcast - section: - - title: Overview - path: /api_docs/python/expression_impl/promote_and_broadcast - - title: promote_and_broadcast - path: /api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast - - title: promote_and_broadcast_anonymous - path: /api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast_anonymous -- title: expression_impl.proto - section: - - title: Overview - path: /api_docs/python/expression_impl/proto - - title: DescriptorPool - path: /api_docs/python/expression_impl/proto/DescriptorPool - - title: FileDescriptorSet - path: /api_docs/python/expression_impl/proto/FileDescriptorSet - - title: ProtoExpression - path: /api_docs/python/expression_impl/proto/ProtoExpression - - title: TransformFn - path: /api_docs/python/expression_impl/proto/TransformFn - - title: create_expression_from_file_descriptor_set - path: /api_docs/python/expression_impl/proto/create_expression_from_file_descriptor_set - - title: create_expression_from_proto - path: /api_docs/python/expression_impl/proto/create_expression_from_proto - - title: create_transformed_field - path: /api_docs/python/expression_impl/proto/create_transformed_field - - title: is_proto_expression - path: /api_docs/python/expression_impl/proto/is_proto_expression -- title: expression_impl.reroot - section: - - title: Overview - path: /api_docs/python/expression_impl/reroot - - title: create_proto_index_field - path: /api_docs/python/expression_impl/reroot/create_proto_index_field - - title: reroot - path: /api_docs/python/expression_impl/reroot/reroot -- title: expression_impl.size - section: - - title: Overview - path: /api_docs/python/expression_impl/size - - title: SizeExpression - path: /api_docs/python/expression_impl/size/SizeExpression - - title: has - path: /api_docs/python/expression_impl/size/has - - title: size - path: /api_docs/python/expression_impl/size/size - - title: size_anonymous - path: /api_docs/python/expression_impl/size/size_anonymous -- title: expression_impl.slice_expression - section: - - title: Overview - path: /api_docs/python/expression_impl/slice_expression - - title: IndexValue - path: /api_docs/python/expression_impl/slice_expression/IndexValue - - title: slice_expression - path: /api_docs/python/expression_impl/slice_expression/slice_expression diff --git a/g3doc/api_docs/python/expression_impl/all_symbols.md b/g3doc/api_docs/python/expression_impl/all_symbols.md deleted file mode 100644 index efd3051..0000000 --- a/g3doc/api_docs/python/expression_impl/all_symbols.md +++ /dev/null @@ -1,67 +0,0 @@ -# All symbols in Struct2Tensor-expression_impl - - - -## Primary symbols -* expression_impl -* expression_impl.apply_schema -* expression_impl.apply_schema.apply_schema -* expression_impl.broadcast -* expression_impl.broadcast.broadcast -* expression_impl.broadcast.broadcast_anonymous -* expression_impl.depth_limit -* expression_impl.depth_limit.limit_depth -* expression_impl.filter_expression -* expression_impl.filter_expression.filter_by_child -* expression_impl.filter_expression.filter_by_sibling -* expression_impl.index -* expression_impl.index.get_index_from_end -* expression_impl.index.get_positional_index -* expression_impl.map_prensor -* expression_impl.map_prensor.map_ragged_tensor -* expression_impl.map_prensor.map_sparse_tensor -* expression_impl.map_prensor_to_prensor -* expression_impl.map_prensor_to_prensor.Schema -* expression_impl.map_prensor_to_prensor.create_schema -* expression_impl.map_prensor_to_prensor.map_prensor_to_prensor -* expression_impl.map_values -* expression_impl.map_values.map_many_values -* expression_impl.map_values.map_values -* expression_impl.map_values.map_values_anonymous -* expression_impl.parquet -* expression_impl.parquet.ParquetDataset -* expression_impl.parquet.calculate_parquet_values -* expression_impl.parquet.create_expression_from_parquet_file -* expression_impl.placeholder -* expression_impl.placeholder.create_expression_from_schema -* expression_impl.placeholder.get_placeholder_paths_from_graph -* expression_impl.project -* expression_impl.project.project -* expression_impl.promote -* expression_impl.promote.PromoteChildExpression -* expression_impl.promote.PromoteExpression -* expression_impl.promote.promote -* expression_impl.promote.promote_anonymous -* expression_impl.promote_and_broadcast -* expression_impl.promote_and_broadcast.promote_and_broadcast -* expression_impl.promote_and_broadcast.promote_and_broadcast_anonymous -* expression_impl.proto -* expression_impl.proto.DescriptorPool -* expression_impl.proto.FileDescriptorSet -* expression_impl.proto.ProtoExpression -* expression_impl.proto.TransformFn -* expression_impl.proto.create_expression_from_file_descriptor_set -* expression_impl.proto.create_expression_from_proto -* expression_impl.proto.create_transformed_field -* expression_impl.proto.is_proto_expression -* expression_impl.reroot -* expression_impl.reroot.create_proto_index_field -* expression_impl.reroot.reroot -* expression_impl.size -* expression_impl.size.SizeExpression -* expression_impl.size.has -* expression_impl.size.size -* expression_impl.size.size_anonymous -* expression_impl.slice_expression -* expression_impl.slice_expression.IndexValue -* expression_impl.slice_expression.slice_expression \ No newline at end of file diff --git a/g3doc/api_docs/python/expression_impl/apply_schema.md b/g3doc/api_docs/python/expression_impl/apply_schema.md deleted file mode 100644 index 3ae89cd..0000000 --- a/g3doc/api_docs/python/expression_impl/apply_schema.md +++ /dev/null @@ -1,60 +0,0 @@ -description: Apply a schema to an expression. - -
- - -
- -# Module: expression_impl.apply_schema - - - - - - - - - -Apply a schema to an expression. - - -A tensorflow metadata schema ( -detailed information about the data: specifically, it presents domain -information (e.g., not just integers, but integers between 0 and 10), and more -detailed structural information (e.g., this field occurs in at least 70% of its -parents, and when it occurs, it shows up 5 to 7 times). - -Applying a schema attaches a tensorflow metadata schema to an expression: -namely, it aligns the features in the schema with the expression's children by -name (possibly recursively). - -After applying a schema to an expression, one can use promote, broadcast, et -cetera, and the schema for new expressions will be inferred. If you write a -custom expression, you can write code that determines the schema information of -the result. - -To get the schema back, call get_schema(). - -This does not filter out fields not in the schema. - - -my_expr = ... -my_schema = ...schema here... -my_new_schema = my_expr.apply_schema(my_schema).get_schema() -my_new_schema has semantically identical information on the fields as my_schema. - - -1. Get the (non-deprecated) paths from a schema. -2. Check if any paths in the schema are not in the expression. -3. Check if any paths in the expression are not in the schema. -4. Project the expression to paths in the schema. - -## Functions - -[`apply_schema(...)`](../expression_impl/apply_schema/apply_schema.md) - diff --git a/g3doc/api_docs/python/expression_impl/apply_schema/apply_schema.md b/g3doc/api_docs/python/expression_impl/apply_schema/apply_schema.md deleted file mode 100644 index 8774e3c..0000000 --- a/g3doc/api_docs/python/expression_impl/apply_schema/apply_schema.md +++ /dev/null @@ -1,32 +0,0 @@ -
- - -
- -# expression_impl.apply_schema.apply_schema - - - - - - - - - - - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/broadcast.md b/g3doc/api_docs/python/expression_impl/broadcast.md deleted file mode 100644 index 5e7565c..0000000 --- a/g3doc/api_docs/python/expression_impl/broadcast.md +++ /dev/null @@ -1,98 +0,0 @@ -description: Methods for broadcasting a path in a tree. - -
- - -
- -# Module: expression_impl.broadcast - - - - - - - - - -Methods for broadcasting a path in a tree. - - -This provides methods for broadcasting a field anonymously (that is used in -promote_and_broadcast), or with an explicitly given name. - -Suppose you have an expr representing: - -``` -+ -| -+-session* (stars indicate repeated) - | - +-event* - | - +-val*-int64 - -session: { - event: {} - event: {} - val: 10 - val: 11 -} -session: { - event: {} - event: {} - val: 20 -} -``` - -#### Then: - - - -``` -broadcast.broadcast(expr, path.Path(["session","val"]), "event", "nv") -``` - -becomes: - -``` -+ -| -+---session* (stars indicate repeated) - | - +-event* - | | - | +---nv*-int64 - | - +-val*-int64 - -session: { - event: { - nv: 10 - nv:11 - } - event: { - nv: 10 - nv:11 - } - val: 10 - val: 11 -} -session: { - event: {nv: 20} - event: {nv: 20} - val: 20 -} -``` - -## Functions - -[`broadcast(...)`](../expression_impl/broadcast/broadcast.md) - -[`broadcast_anonymous(...)`](../expression_impl/broadcast/broadcast_anonymous.md) - diff --git a/g3doc/api_docs/python/expression_impl/broadcast/broadcast.md b/g3doc/api_docs/python/expression_impl/broadcast/broadcast.md deleted file mode 100644 index dd1285f..0000000 --- a/g3doc/api_docs/python/expression_impl/broadcast/broadcast.md +++ /dev/null @@ -1,34 +0,0 @@ -
- - -
- -# expression_impl.broadcast.broadcast - - - - - - - - - - - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/broadcast/broadcast_anonymous.md b/g3doc/api_docs/python/expression_impl/broadcast/broadcast_anonymous.md deleted file mode 100644 index a18b94b..0000000 --- a/g3doc/api_docs/python/expression_impl/broadcast/broadcast_anonymous.md +++ /dev/null @@ -1,33 +0,0 @@ -
- - -
- -# expression_impl.broadcast.broadcast_anonymous - - - - - - - - - - - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/depth_limit.md b/g3doc/api_docs/python/expression_impl/depth_limit.md deleted file mode 100644 index 7ba27d2..0000000 --- a/g3doc/api_docs/python/expression_impl/depth_limit.md +++ /dev/null @@ -1,52 +0,0 @@ -description: Caps the depth of an expression. - -
- - -
- -# Module: expression_impl.depth_limit - - - - - - - - - -Caps the depth of an expression. - - -Suppose you have an expression expr modeled as: - -``` - * - \ - A - / \ - D B - \ - C -``` - -if expr_2 = depth_limit.limit_depth(expr, 2) -You get: - -``` - * - \ - A - / \ - D B -``` - -## Functions - -[`limit_depth(...)`](../expression_impl/depth_limit/limit_depth.md): Limit the depth to nodes k steps from expr. - diff --git a/g3doc/api_docs/python/expression_impl/depth_limit/limit_depth.md b/g3doc/api_docs/python/expression_impl/depth_limit/limit_depth.md deleted file mode 100644 index 4b46a53..0000000 --- a/g3doc/api_docs/python/expression_impl/depth_limit/limit_depth.md +++ /dev/null @@ -1,34 +0,0 @@ -description: Limit the depth to nodes k steps from expr. - -
- - -
- -# expression_impl.depth_limit.limit_depth - - - - - - - - - -Limit the depth to nodes k steps from expr. - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/filter_expression.md b/g3doc/api_docs/python/expression_impl/filter_expression.md deleted file mode 100644 index d9018a6..0000000 --- a/g3doc/api_docs/python/expression_impl/filter_expression.md +++ /dev/null @@ -1,76 +0,0 @@ -description: Create a new expression that is a filtered version of an original one. - -
- - -
- -# Module: expression_impl.filter_expression - - - - - - - - - -Create a new expression that is a filtered version of an original one. - - -There are two public methods in this module: filter_by_sibling and -filter_by_child. As with most other operations, these create a new tree which -has all the original paths of the original tree, but with a new subtree. - -filter_by_sibling allows you to filter an expression by a boolean sibling field. - -Beginning with the struct: - -``` -root = - -----*---------------------------------------------------- - / \ \ - root0 root1----------------------- root2 (empty) - / \ / \ \ \ - | keep_my_sib0:False | keep_my_sib1:True | keep_my_sib2:False - doc0----- doc1--------------- doc2-------- - | \ \ \ \ \ - bar:"a" keep_me:False bar:"b" bar:"c" keep_me:True bar:"d" - -# Note, keep_my_sib and doc must have the same shape (e.g., each root -has the same number of keep_my_sib children as doc children). -root_2 = filter_expression.filter_by_sibling( - root, path.create_path("doc"), "keep_my_sib", "new_doc") - -End with the struct (suppressing original doc): - -----*---------------------------------------------------- - / \ \ - root0 root1------------------ root2 (empty) - \ / \ \ - keep_my_sib0:False | keep_my_sib1:True keep_my_sib2:False - new_doc0----------- - \ \ \ - bar:"b" bar:"c" keep_me:True -``` - -filter_by_sibling allows you to filter an expression by a optional boolean -child field. - -The following call will have the same effect as above: - -``` -root_2 = filter_expression.filter_by_child( - root, path.create_path("doc"), "keep_me", "new_doc") -``` - -## Functions - -[`filter_by_child(...)`](../expression_impl/filter_expression/filter_by_child.md): Filter an expression by an optional boolean child field. - -[`filter_by_sibling(...)`](../expression_impl/filter_expression/filter_by_sibling.md): Filter an expression by its sibling. - diff --git a/g3doc/api_docs/python/expression_impl/filter_expression/filter_by_child.md b/g3doc/api_docs/python/expression_impl/filter_expression/filter_by_child.md deleted file mode 100644 index 5427623..0000000 --- a/g3doc/api_docs/python/expression_impl/filter_expression/filter_by_child.md +++ /dev/null @@ -1,90 +0,0 @@ -description: Filter an expression by an optional boolean child field. - -
- - -
- -# expression_impl.filter_expression.filter_by_child - - - - - - - - - -Filter an expression by an optional boolean child field. - - - - - - - -If the child field is present and True, then keep that parent. -Otherwise, drop the parent. - - - - - - - - - - - - - - - - - - - -
-`expr` - -the original expression -
-`p` - -the path to filter. -
-`child_field_name` - -the boolean child field to use to filter. -
-`new_field_name` - -the new, filtered version of path. -
- - - - - - - - - - - -
-The new root expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/filter_expression/filter_by_sibling.md b/g3doc/api_docs/python/expression_impl/filter_expression/filter_by_sibling.md deleted file mode 100644 index 1e3808d..0000000 --- a/g3doc/api_docs/python/expression_impl/filter_expression/filter_by_sibling.md +++ /dev/null @@ -1,92 +0,0 @@ -description: Filter an expression by its sibling. - -
- - -
- -# expression_impl.filter_expression.filter_by_sibling - - - - - - - - - -Filter an expression by its sibling. - - - - - - - - -This is similar to boolean_mask. The shape of the path being filtered and -the sibling must be identical (e.g., each parent object must have an -equal number of source and sibling children). - - - - - - - - - - - - - - - - - - - -
-`expr` - -the root expression. -
-`p` - -a path to the source to be filtered. -
-`sibling_field_name` - -the sibling to use as a mask. -
-`new_field_name` - -a new sibling to create. -
- - - - - - - - - - - -
-a new root. -
- diff --git a/g3doc/api_docs/python/expression_impl/index.md b/g3doc/api_docs/python/expression_impl/index.md deleted file mode 100644 index 822383a..0000000 --- a/g3doc/api_docs/python/expression_impl/index.md +++ /dev/null @@ -1,136 +0,0 @@ -description: get_positional_index and get_index_from_end methods. - -
- - -
- -# Module: expression_impl.index - - - - - - - - - -get_positional_index and get_index_from_end methods. - - -The parent_index identifies the index of the parent of each element. These -methods take the parent_index to determine the relationship with respect to -other elements. - -#### Given: - - - -``` -session: { - event: { - val: 111 - } - event: { - val: 121 - val: 122 - } -} - -session: { - event: { - val: 10 - val: 7 - } - event: { - val: 1 - } -} -``` - -``` -get_positional_index(expr, path.Path(["event","val"]), "val_index") -``` - -yields: - -``` -session: { - event: { - val: 111 - val_index: 0 - } - event: { - val: 121 - val: 122 - val_index: 0 - val_index: 1 - } -} - -session: { - event: { - val: 10 - val: 7 - val_index: 0 - val_index: 1 - } - event: { - val: 1 - val_index: 0 - } -} -``` - -``` -get_index_from_end(expr, path.Path(["event","val"]), "neg_val_index") -``` -yields: - -``` -session: { - event: { - val: 111 - neg_val_index: -1 - } - event: { - val: 121 - val: 122 - neg_val_index: -2 - neg_val_index: -1 - } -} - -session: { - event: { - val: 10 - val: 7 - neg_val_index: 2 - neg_val_index: -1 - } - event: { - val: 1 - neg_val_index: -1 - } -} -``` - -These methods are useful when you want to depend upon the index of a field. -For example, if you want to filter examples based upon their index, or -cogroup two fields by index, then first creating the index is useful. - -Note that while the parent indices of these fields seem like overhead, they -are just references to the parent indices of other fields, and are therefore -take little memory or CPU. - -## Functions - -[`get_index_from_end(...)`](../expression_impl/index/get_index_from_end.md): Gets the number of steps from the end of the array. - -[`get_positional_index(...)`](../expression_impl/index/get_positional_index.md): Gets the positional index. - diff --git a/g3doc/api_docs/python/expression_impl/index/get_index_from_end.md b/g3doc/api_docs/python/expression_impl/index/get_index_from_end.md deleted file mode 100644 index 6ab17db..0000000 --- a/g3doc/api_docs/python/expression_impl/index/get_index_from_end.md +++ /dev/null @@ -1,82 +0,0 @@ -description: Gets the number of steps from the end of the array. - -
- - -
- -# expression_impl.index.get_index_from_end - - - - - - - - - -Gets the number of steps from the end of the array. - - - - - - - -Given an array ["a", "b", "c"], with indices [0, 1, 2], the result of this -is [-3,-2,-1]. - - - - - - - - - - - - - - - - -
-`t` - -original expression -
-`source_path` - -path in expression to get index of. -
-`new_field_name` - -the name of the new field. -
- - - - - - - - - - - -
-The new expression and the new path as a pair. -
- diff --git a/g3doc/api_docs/python/expression_impl/index/get_positional_index.md b/g3doc/api_docs/python/expression_impl/index/get_positional_index.md deleted file mode 100644 index 43b4e2b..0000000 --- a/g3doc/api_docs/python/expression_impl/index/get_positional_index.md +++ /dev/null @@ -1,82 +0,0 @@ -description: Gets the positional index. - -
- - -
- -# expression_impl.index.get_positional_index - - - - - - - - - -Gets the positional index. - - - - - - - -Given a field with parent_index [0,1,1,2,3,4,4], this returns: -parent_index [0,1,1,2,3,4,4] and value [0,0,1,0,0,0,1] - - - - - - - - - - - - - - - - -
-`expr` - -original expression -
-`source_path` - -path in expression to get index of. -
-`new_field_name` - -the name of the new field. -
- - - - - - - - - - - -
-The new expression and the new path as a pair. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_prensor.md b/g3doc/api_docs/python/expression_impl/map_prensor.md deleted file mode 100644 index 9806d8e..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor.md +++ /dev/null @@ -1,117 +0,0 @@ -description: Arbitrary operations from sparse and ragged tensors to a leaf field. - -
- - -
- -# Module: expression_impl.map_prensor - - - - - - - - - -Arbitrary operations from sparse and ragged tensors to a leaf field. - - -There are two public methods of note right now: map_sparse_tensor -and map_ragged_tensor. - -#### Assume expr is: - - - -``` -session: { - event: { - val_a: 10 - val_b: 1 - } - event: { - val_a: 20 - val_b: 2 - } - event: { - } - event: { - val_a: 40 - } - event: { - val_b: 5 - } -} -``` - -Either of the following alternatives will add val_a and val_b -to create val_sum. - -map_sparse_tensor converts val_a and val_b to sparse tensors, -and then add them to produce val_sum. - -``` -new_root = map_prensor.map_sparse_tensor( - expr, - path.Path(["event"]), - [path.Path(["val_a"]), path.Path(["val_b"])], - lambda x,y: x + y, - False, - tf.int32, - "val_sum") -``` - -map_ragged_tensor converts val_a and val_b to ragged tensors, -and then add them to produce val_sum. - -``` -new_root = map_prensor.map_ragged_tensor( - expr, - path.Path(["event"]), - [path.Path(["val_a"]), path.Path(["val_b"])], - lambda x,y: x + y, - False, - tf.int32, - "val_sum") -``` - -The result of either is: - -``` -session: { - event: { - val_a: 10 - val_b: 1 - val_sum: 11 - } - event: { - val_a: 20 - val_b: 2 - val_sum: 22 - } - event: { - } - event: { - val_a: 40 - val_sum: 40 - } - event: { - val_b: 5 - val_sum: 5 - } -} -``` - -## Functions - -[`map_ragged_tensor(...)`](../expression_impl/map_prensor/map_ragged_tensor.md): Map a ragged tensor. - -[`map_sparse_tensor(...)`](../expression_impl/map_prensor/map_sparse_tensor.md): Maps a sparse tensor. - diff --git a/g3doc/api_docs/python/expression_impl/map_prensor/map_ragged_tensor.md b/g3doc/api_docs/python/expression_impl/map_prensor/map_ragged_tensor.md deleted file mode 100644 index 9b12225..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor/map_ragged_tensor.md +++ /dev/null @@ -1,115 +0,0 @@ -description: Map a ragged tensor. - -
- - -
- -# expression_impl.map_prensor.map_ragged_tensor - - - - - - - - - -Map a ragged tensor. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -the root of the expression. -
-`root_path` - -the path relative to which the ragged tensors are calculated. -
-`paths` - -the input paths relative to the root_path -
-`operation` - -a method that takes the list of ragged tensors as input and -returns a ragged tensor. -
-`is_repeated` - -true if the result of operation is repeated. -
-`dtype` - -dtype of the result of the operation. -
-`new_field_name` - -root_path.get_child(new_field_name) is the path of the -result. -
- - - - - - - - - - - -
-A new root expression containing the old root expression plus the new path, -root_path.get_child(new_field_name), with the result of the operation. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_prensor/map_sparse_tensor.md b/g3doc/api_docs/python/expression_impl/map_prensor/map_sparse_tensor.md deleted file mode 100644 index 2f32e89..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor/map_sparse_tensor.md +++ /dev/null @@ -1,115 +0,0 @@ -description: Maps a sparse tensor. - -
- - -
- -# expression_impl.map_prensor.map_sparse_tensor - - - - - - - - - -Maps a sparse tensor. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -the root of the expression. -
-`root_path` - -the path relative to which the sparse tensors are calculated. -
-`paths` - -the input paths relative to the root_path -
-`operation` - -a method that takes the list of sparse tensors as input and -returns a sparse tensor. -
-`is_repeated` - -true if the result of operation is repeated. -
-`dtype` - -dtype of the result of the operation. -
-`new_field_name` - -root_path.get_child(new_field_name) is the path of the -result. -
- - - - - - - - - - - -
-A new root expression containing the old root expression plus the new path, -root_path.get_child(new_field_name), with the result of the operation. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor.md b/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor.md deleted file mode 100644 index ce3164f..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor.md +++ /dev/null @@ -1,89 +0,0 @@ -description: Arbitrary operations from prensors to prensors in an expression. - -
- - -
- -# Module: expression_impl.map_prensor_to_prensor - - - - - - - - - -Arbitrary operations from prensors to prensors in an expression. - - -This is useful if a single op generates an entire structure. In general, it is -better to use the existing expressions framework or design a custom expression -than use this op. So long as any of the output is required, all of the input -is required. - -For example, suppose you have an op my_op, that takes a prensor of the form: - -``` - event - / \ - foo bar -``` - -and produces a prensor of the form my_result_schema: - -``` - event - / \ - foo2 bar2 -``` - -``` -my_result_schema = create_schema( - is_repeated=True, - children={"foo2":{is_repeated:True, dtype:tf.int64}, - "bar2":{is_repeated:False, dtype:tf.int64}}) -``` - -If you give it an expression original with the schema: - -``` - session - | - event - / \ -foo bar - -result = map_prensor_to_prensor( - original, - path.Path(["session","event"]), - my_op, - my_result_schema) -``` - -Result will have the schema: - -``` - session - | - event-------- - / \ \ \ -foo bar foo2 bar2 -``` - -## Classes - -[`class Schema`](../expression_impl/map_prensor_to_prensor/Schema.md): A finite schema for a prensor. - -## Functions - -[`create_schema(...)`](../expression_impl/map_prensor_to_prensor/create_schema.md): Create a schema recursively. - -[`map_prensor_to_prensor(...)`](../expression_impl/map_prensor_to_prensor/map_prensor_to_prensor.md): Maps an expression to a prensor, and merges that prensor. - diff --git a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/Schema.md b/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/Schema.md deleted file mode 100644 index 065306b..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/Schema.md +++ /dev/null @@ -1,153 +0,0 @@ -description: A finite schema for a prensor. - -
- - - - - -
- -# expression_impl.map_prensor_to_prensor.Schema - - - - - - - - - -A finite schema for a prensor. - - - - - - - -Effectively, this stores everything for the prensor but the tensors -themselves. - -Notice that this is slightly different than schema_pb2.Schema, although -similar in nature. At present, there is no clear way to extract is_repeated -and dtype from schema_pb2.Schema. - -See create_schema below for constructing a schema. - -Note that for LeafNodeTensor, dtype is not None. -Also, for ChildNodeTensor and RootNodeTensor, dtype is None. However, -a ChildNodeTensor or RootNodeTensor could be childless. - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - -is the root repeated? -
-`dtype` - -tf.dtype of the root if the root is a leaf, otherwise None. -
-`schema_feature` - -schema_pb2.Feature of the root (no struct_domain -necessary) -
-`children` - -child schemas. -
- - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - - -
-`schema_feature` - - -
-`type` - - -
- - - -## Methods - -

get_child

- -View source - - - - - - -

known_field_names

- -View source - - - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/create_schema.md b/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/create_schema.md deleted file mode 100644 index d249def..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/create_schema.md +++ /dev/null @@ -1,98 +0,0 @@ -description: Create a schema recursively. - -
- - -
- -# expression_impl.map_prensor_to_prensor.create_schema - - - - - - - - - -Create a schema recursively. - - - - - - - - -#### Example: - - -my_result_schema = create_schema( - is_repeated=True, - children={"foo2":{is_repeated=True, dtype=tf.int64}, - "bar2":{is_repeated=False, dtype=tf.int64}}) - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - -whether the root is repeated. -
-`dtype` - -the dtype of a leaf (None for non-leaves). -
-`schema_feature` - -the schema_pb2.Feature describing this expression. name and -struct_domain need not be specified. -
-`children` - -the child schemas. Note that the value type of children is either -a Schema or a dictionary of arguments to create_schema. -
- - - - - - - - - - - -
-a new Schema represented by the inputs. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/map_prensor_to_prensor.md b/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/map_prensor_to_prensor.md deleted file mode 100644 index 9d6f5c5..0000000 --- a/g3doc/api_docs/python/expression_impl/map_prensor_to_prensor/map_prensor_to_prensor.md +++ /dev/null @@ -1,129 +0,0 @@ -description: Maps an expression to a prensor, and merges that prensor. - -
- - -
- -# expression_impl.map_prensor_to_prensor.map_prensor_to_prensor - - - - - - - - - -Maps an expression to a prensor, and merges that prensor. - - - - - - - -For example, suppose you have an op my_op, that takes a prensor of the form: - - event - / \ - foo bar - -and produces a prensor of the form my_result_schema: - - event - / \ - foo2 bar2 - -If you give it an expression original with the schema: - - session - | - event - / \ -foo bar - -result = map_prensor_to_prensor( - original, - path.Path(["session","event"]), - my_op, - my_output_schema) - -Result will have the schema: - - session - | - event-------- - / \ \ \ -foo bar foo2 bar2 - - - - - - - - - - - - - - - - - - - - - - -
-`root_expr` - -the root expression -
-`source` - -the path where the prensor op is applied. -
-`paths_needed` - -the paths needed for the op. -
-`prensor_op` - -the prensor op -
-`output_schema` - -the output schema of the op. -
- - - - - - - - - - - -
-A new expression where the prensor is merged. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_values.md b/g3doc/api_docs/python/expression_impl/map_values.md deleted file mode 100644 index 7553266..0000000 --- a/g3doc/api_docs/python/expression_impl/map_values.md +++ /dev/null @@ -1,39 +0,0 @@ -description: Maps the values of various leaves of the same child to a single result. - -
- - -
- -# Module: expression_impl.map_values - - - - - - - - - -Maps the values of various leaves of the same child to a single result. - - -All inputs must have the same shape (parent_index must be equal). - -The output is given the same shape (output of function must be of equal length). - -Note that the operations are on 1-D tensors (as opposed to scalars). - -## Functions - -[`map_many_values(...)`](../expression_impl/map_values/map_many_values.md): Map multiple sibling fields into a new sibling. - -[`map_values(...)`](../expression_impl/map_values/map_values.md): Map field into a new sibling. - -[`map_values_anonymous(...)`](../expression_impl/map_values/map_values_anonymous.md): Map field into a new sibling. - diff --git a/g3doc/api_docs/python/expression_impl/map_values/map_many_values.md b/g3doc/api_docs/python/expression_impl/map_values/map_many_values.md deleted file mode 100644 index 5ab7f1f..0000000 --- a/g3doc/api_docs/python/expression_impl/map_values/map_many_values.md +++ /dev/null @@ -1,106 +0,0 @@ -description: Map multiple sibling fields into a new sibling. - -
- - -
- -# expression_impl.map_values.map_many_values - - - - - - - - - -Map multiple sibling fields into a new sibling. - - - - - - - -All source fields must have the same shape, and the shape of the output -must be the same as well. - - - - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -original root. -
-`parent_path` - -parent path of all sources and the new field. -
-`source_fields` - -source fields of the operation. Must have the same shape. -
-`operation` - -operation from source_fields to new field. -
-`dtype` - -type of new field. -
-`new_field_name` - -name of the new field. -
- - - - - - - - - - - -
-The new expression and the new path as a pair. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_values/map_values.md b/g3doc/api_docs/python/expression_impl/map_values/map_values.md deleted file mode 100644 index 7b2f2c1..0000000 --- a/g3doc/api_docs/python/expression_impl/map_values/map_values.md +++ /dev/null @@ -1,97 +0,0 @@ -description: Map field into a new sibling. - -
- - -
- -# expression_impl.map_values.map_values - - - - - - - - - -Map field into a new sibling. - - - - - - - -The shape of the output must be the same as the input. - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -original root. -
-`source_path` - -source of the operation. -
-`operation` - -operation from source_fields to new field. -
-`dtype` - -type of new field. -
-`new_field_name` - -name of the new field. -
- - - - - - - - - - - -
-The new expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/map_values/map_values_anonymous.md b/g3doc/api_docs/python/expression_impl/map_values/map_values_anonymous.md deleted file mode 100644 index 6a8e712..0000000 --- a/g3doc/api_docs/python/expression_impl/map_values/map_values_anonymous.md +++ /dev/null @@ -1,89 +0,0 @@ -description: Map field into a new sibling. - -
- - -
- -# expression_impl.map_values.map_values_anonymous - - - - - - - - - -Map field into a new sibling. - - - - - - - -The shape of the output must be the same as the input. - - - - - - - - - - - - - - - - - - - -
-`root` - -original root. -
-`source_path` - -source of the operation. -
-`operation` - -operation from source_fields to new field. -
-`dtype` - -type of new field. -
- - - - - - - - - - - -
-The new expression and the new path as a pair. -
- diff --git a/g3doc/api_docs/python/expression_impl/parquet.md b/g3doc/api_docs/python/expression_impl/parquet.md deleted file mode 100644 index 2a3cf40..0000000 --- a/g3doc/api_docs/python/expression_impl/parquet.md +++ /dev/null @@ -1,50 +0,0 @@ -description: Apache Parquet Dataset. - -
- - -
- -# Module: expression_impl.parquet - - - - - - - - - -Apache Parquet Dataset. - - - -#### Example usage: - - - -``` - exp = create_expression_from_parquet_file(filenames) - docid_project_exp = project.project(exp, [path.Path(["DocId"])]) - pqds = parquet_dataset.calculate_parquet_values([docid_project_exp], exp, - filenames, batch_size) - - for prensors in pqds: - doc_id_prensor = prensors[0] -``` - -## Classes - -[`class ParquetDataset`](../expression_impl/parquet/ParquetDataset.md): A dataset which reads columns from a parquet file and returns a prensor. - -## Functions - -[`calculate_parquet_values(...)`](../expression_impl/parquet/calculate_parquet_values.md): Calculates expressions and returns a parquet dataset. - -[`create_expression_from_parquet_file(...)`](../expression_impl/parquet/create_expression_from_parquet_file.md): Creates a placeholder expression from a parquet file. - diff --git a/g3doc/api_docs/python/expression_impl/parquet/ParquetDataset.md b/g3doc/api_docs/python/expression_impl/parquet/ParquetDataset.md deleted file mode 100644 index b8261b6..0000000 --- a/g3doc/api_docs/python/expression_impl/parquet/ParquetDataset.md +++ /dev/null @@ -1,3455 +0,0 @@ -description: A dataset which reads columns from a parquet file and returns a prensor. - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -# expression_impl.parquet.ParquetDataset - - - - - - - - - -A dataset which reads columns from a parquet file and returns a prensor. - - - - - - - -The prensor will have a PrensorTypeSpec, which is created based on -value_paths. - -Note: In tensorflow v1 this dataset will not return a prensor. The output will -be the same format as _RawParquetDataset's output (a vector of tensors). -The following is a workaround in v1: - pq_ds = ParquetDataset(...) - type_spec = pq_ds.element_spec - tensors = pq_ds.make_one_shot_iterator().get_next() - prensor = type_spec.from_components(tensors) - session.run(prensor) - - - - - - - - - - - - - - - - -
-`filenames` - -A list containing the name(s) of the file(s) to be read. -
-`value_paths` - -A list of strings of the dotstring path(s) of each leaf -path(s). -
-`batch_size` - -An int that determines how many messages are parsed into one -prensor tree in an iteration. If there are fewer than batch_size -remaining messages, then all remaining messages will be returned. -
- - - - - - - - - - - - -
-`ValueError` - -if the column does not exist in the parquet schema. -
- - - - - - - - - - - - - - - - - - - - - - - -
-`element_spec` - -The type specification of an element of this dataset. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> dataset.element_spec -TensorSpec(shape=(), dtype=tf.int32, name=None) -``` - -For more information, -read [this guide](https://www.tensorflow.org/guide/data#dataset_structure). -
-`output_classes` - -Returns the class of each component of an element of this dataset. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use `tf.compat.v1.data.get_output_classes(dataset)`. -
-`output_shapes` - -Returns the shape of each component of an element of this dataset. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use `tf.compat.v1.data.get_output_shapes(dataset)`. -
-`output_types` - -Returns the type of each component of an element of this dataset. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use `tf.compat.v1.data.get_output_types(dataset)`. -
- - - -## Methods - -

apply

- - - -Applies a transformation function to this dataset. - -`apply` enables chaining of custom `Dataset` transformations, which are -represented as functions that take one `Dataset` argument and return a -transformed `Dataset`. - -``` ->>> dataset = tf.data.Dataset.range(100) ->>> def dataset_fn(ds): -... return ds.filter(lambda x: x < 5) ->>> dataset = dataset.apply(dataset_fn) ->>> list(dataset.as_numpy_iterator()) -[0, 1, 2, 3, 4] -``` - - - - - - - - - - -
Args
-`transformation_func` - -A function that takes one `Dataset` argument and -returns a `Dataset`. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -The `Dataset` returned by applying `transformation_func` to this -dataset. -
- - - -

as_numpy_iterator

- - - -Returns an iterator which converts all elements of the dataset to numpy. - -Use `as_numpy_iterator` to inspect the content of your dataset. To see -element shapes and types, print dataset elements directly instead of using -`as_numpy_iterator`. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> for element in dataset: -... print(element) -tf.Tensor(1, shape=(), dtype=int32) -tf.Tensor(2, shape=(), dtype=int32) -tf.Tensor(3, shape=(), dtype=int32) -``` - -This method requires that you are running in eager mode and the dataset's -element_spec contains only `TensorSpec` components. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> for element in dataset.as_numpy_iterator(): -... print(element) -1 -2 -3 -``` - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> print(list(dataset.as_numpy_iterator())) -[1, 2, 3] -``` - -`as_numpy_iterator()` will preserve the nested structure of dataset -elements. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices({'a': ([1, 2], [3, 4]), -... 'b': [5, 6]}) ->>> list(dataset.as_numpy_iterator()) == [{'a': (1, 3), 'b': 5}, -... {'a': (2, 4), 'b': 6}] -True -``` - - - - - - - - - -
Returns
-An iterable over the elements of the dataset, with their tensors converted -to numpy arrays. -
- - - - - - - - - - - - - - - -
Raises
-`TypeError` - -if an element contains a non-`Tensor` value. -
-`RuntimeError` - -if eager execution is not enabled. -
- - - -

batch

- - - -Combines consecutive elements of this dataset into batches. - -``` ->>> dataset = tf.data.Dataset.range(8) ->>> dataset = dataset.batch(3) ->>> list(dataset.as_numpy_iterator()) -[array([0, 1, 2]), array([3, 4, 5]), array([6, 7])] -``` - -``` ->>> dataset = tf.data.Dataset.range(8) ->>> dataset = dataset.batch(3, drop_remainder=True) ->>> list(dataset.as_numpy_iterator()) -[array([0, 1, 2]), array([3, 4, 5])] -``` - -The components of the resulting element will have an additional outer -dimension, which will be `batch_size` (or `N % batch_size` for the last -element if `batch_size` does not divide the number of input elements `N` -evenly and `drop_remainder` is `False`). If your program depends on the -batches having the same outer dimension, you should set the `drop_remainder` -argument to `True` to prevent the smaller batch from being produced. - - - - - - - - - - - - - - - - - - - -
Args
-`batch_size` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -consecutive elements of this dataset to combine in a single batch. -
-`drop_remainder` - -(Optional.) A `tf.bool` scalar `tf.Tensor`, representing -whether the last batch should be dropped in the case it has fewer than -`batch_size` elements; the default behavior is not to drop the smaller -batch. -
-`num_parallel_calls` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, -representing the number of batches to compute asynchronously in -parallel. -If not specified, batches will be computed sequentially. If the value -`tf.data.AUTOTUNE` is used, then the number of parallel -calls is set dynamically based on available resources. -
-`deterministic` - -(Optional.) When `num_parallel_calls` is specified, if this -boolean is specified (`True` or `False`), it controls the order in which -the transformation produces elements. If set to `False`, the -transformation is allowed to yield elements out of order to trade -determinism for performance. If not specified, the -`tf.data.Options.experimental_deterministic` option -(`True` by default) controls the behavior. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

cache

- - - -Caches the elements in this dataset. - -The first time the dataset is iterated over, its elements will be cached -either in the specified file or in memory. Subsequent iterations will -use the cached data. - -Note: For the cache to be finalized, the input dataset must be iterated -through in its entirety. Otherwise, subsequent iterations will not use -cached data. - -``` ->>> dataset = tf.data.Dataset.range(5) ->>> dataset = dataset.map(lambda x: x**2) ->>> dataset = dataset.cache() ->>> # The first time reading through the data will generate the data using ->>> # `range` and `map`. ->>> list(dataset.as_numpy_iterator()) -[0, 1, 4, 9, 16] ->>> # Subsequent iterations read from the cache. ->>> list(dataset.as_numpy_iterator()) -[0, 1, 4, 9, 16] -``` - -When caching to a file, the cached data will persist across runs. Even the -first iteration through the data will read from the cache file. Changing -the input pipeline before the call to `.cache()` will have no effect until -the cache file is removed or the filename is changed. - -```python -dataset = tf.data.Dataset.range(5) -dataset = dataset.cache("/path/to/file") -list(dataset.as_numpy_iterator()) -# [0, 1, 2, 3, 4] -dataset = tf.data.Dataset.range(10) -dataset = dataset.cache("/path/to/file") # Same file! -list(dataset.as_numpy_iterator()) -# [0, 1, 2, 3, 4] -``` - -Note: `cache` will produce exactly the same elements during each iteration -through the dataset. If you wish to randomize the iteration order, make sure -to call `shuffle` *after* calling `cache`. - - - - - - - - - - -
Args
-`filename` - -A `tf.string` scalar `tf.Tensor`, representing the name of a -directory on the filesystem to use for caching elements in this Dataset. -If a filename is not provided, the dataset will be cached in memory. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

cardinality

- - - -Returns the cardinality of the dataset, if known. - -`cardinality` may return `tf.data.INFINITE_CARDINALITY` if the dataset -contains an infinite number of elements or `tf.data.UNKNOWN_CARDINALITY` if -the analysis fails to determine the number of elements in the dataset -(e.g. when the dataset source is a file). - -``` ->>> dataset = tf.data.Dataset.range(42) ->>> print(dataset.cardinality().numpy()) -42 ->>> dataset = dataset.repeat() ->>> cardinality = dataset.cardinality() ->>> print((cardinality == tf.data.INFINITE_CARDINALITY).numpy()) -True ->>> dataset = dataset.filter(lambda x: True) ->>> cardinality = dataset.cardinality() ->>> print((cardinality == tf.data.UNKNOWN_CARDINALITY).numpy()) -True -``` - - - - - - - - - -
Returns
-A scalar `tf.int64` `Tensor` representing the cardinality of the dataset. -If the cardinality is infinite or unknown, `cardinality` returns the -named constants `tf.data.INFINITE_CARDINALITY` and -`tf.data.UNKNOWN_CARDINALITY` respectively. -
- - - -

concatenate

- - - -Creates a `Dataset` by concatenating the given dataset with this dataset. - -``` ->>> a = tf.data.Dataset.range(1, 4) # ==> [ 1, 2, 3 ] ->>> b = tf.data.Dataset.range(4, 8) # ==> [ 4, 5, 6, 7 ] ->>> ds = a.concatenate(b) ->>> list(ds.as_numpy_iterator()) -[1, 2, 3, 4, 5, 6, 7] ->>> # The input dataset and dataset to be concatenated should have ->>> # compatible element specs. ->>> c = tf.data.Dataset.zip((a, b)) ->>> a.concatenate(c) -Traceback (most recent call last): -TypeError: Two datasets to concatenate have different types - and (tf.int64, tf.int64) ->>> d = tf.data.Dataset.from_tensor_slices(["a", "b", "c"]) ->>> a.concatenate(d) -Traceback (most recent call last): -TypeError: Two datasets to concatenate have different types - and -``` - - - - - - - - - - -
Args
-`dataset` - -`Dataset` to be concatenated. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

enumerate

- - - -Enumerates the elements of this dataset. - -It is similar to python's `enumerate`. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> dataset = dataset.enumerate(start=5) ->>> for element in dataset.as_numpy_iterator(): -... print(element) -(5, 1) -(6, 2) -(7, 3) -``` - -``` ->>> # The (nested) structure of the input dataset determines the ->>> # structure of elements in the resulting dataset. ->>> dataset = tf.data.Dataset.from_tensor_slices([(7, 8), (9, 10)]) ->>> dataset = dataset.enumerate() ->>> for element in dataset.as_numpy_iterator(): -... print(element) -(0, array([7, 8], dtype=int32)) -(1, array([ 9, 10], dtype=int32)) -``` - - - - - - - - - - -
Args
-`start` - -A `tf.int64` scalar `tf.Tensor`, representing the start value for -enumeration. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

filter

- - - -Filters this dataset according to `predicate`. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> dataset = dataset.filter(lambda x: x < 3) ->>> list(dataset.as_numpy_iterator()) -[1, 2] ->>> # `tf.math.equal(x, y)` is required for equality comparison ->>> def filter_fn(x): -... return tf.math.equal(x, 1) ->>> dataset = dataset.filter(filter_fn) ->>> list(dataset.as_numpy_iterator()) -[1] -``` - - - - - - - - - - -
Args
-`predicate` - -A function mapping a dataset element to a boolean. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -The `Dataset` containing the elements of this dataset for which -`predicate` is `True`. -
- - - -

filter_with_legacy_function

- - - -Filters this dataset according to `predicate`. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use `tf.data.Dataset.filter() - -Note: This is an escape hatch for existing uses of `filter` that do not work -with V2 functions. New uses are strongly discouraged and existing uses -should migrate to `filter` as this method will be removed in V2. - - - - - - - - - - -
Args
-`predicate` - -A function mapping a (nested) structure of tensors (having -shapes and types defined by `self.output_shapes` and -`self.output_types`) to a scalar `tf.bool` tensor. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -The `Dataset` containing the elements of this dataset for which -`predicate` is `True`. -
- - - -

flat_map

- - - -Maps `map_func` across this dataset and flattens the result. - -Use `flat_map` if you want to make sure that the order of your dataset -stays the same. For example, to flatten a dataset of batches into a -dataset of their elements: - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices( -... [[1, 2, 3], [4, 5, 6], [7, 8, 9]]) ->>> dataset = dataset.flat_map(lambda x: Dataset.from_tensor_slices(x)) ->>> list(dataset.as_numpy_iterator()) -[1, 2, 3, 4, 5, 6, 7, 8, 9] -``` - -`tf.data.Dataset.interleave()` is a generalization of `flat_map`, since -`flat_map` produces the same output as -`tf.data.Dataset.interleave(cycle_length=1)` - - - - - - - - - - -
Args
-`map_func` - -A function mapping a dataset element to a dataset. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

from_generator

- - - -Creates a `Dataset` whose elements are generated by `generator`. (deprecated arguments) - -Warning: SOME ARGUMENTS ARE DEPRECATED: `(output_shapes, output_types)`. They will be removed in a future version. -Instructions for updating: -Use output_signature instead - -The `generator` argument must be a callable object that returns -an object that supports the `iter()` protocol (e.g. a generator function). - -The elements generated by `generator` must be compatible with either the -given `output_signature` argument or with the given `output_types` and -(optionally) `output_shapes` arguments, whichever was specified. - -The recommended way to call `from_generator` is to use the -`output_signature` argument. In this case the output will be assumed to -consist of objects with the classes, shapes and types defined by -`tf.TypeSpec` objects from `output_signature` argument: - -``` ->>> def gen(): -... ragged_tensor = tf.ragged.constant([[1, 2], [3]]) -... yield 42, ragged_tensor ->>> ->>> dataset = tf.data.Dataset.from_generator( -... gen, -... output_signature=( -... tf.TensorSpec(shape=(), dtype=tf.int32), -... tf.RaggedTensorSpec(shape=(2, None), dtype=tf.int32))) ->>> ->>> list(dataset.take(1)) -[(, -)] -``` - -There is also a deprecated way to call `from_generator` by either with -`output_types` argument alone or together with `output_shapes` argument. -In this case the output of the function will be assumed to consist of -`tf.Tensor` objects with the types defined by `output_types` and with the -shapes which are either unknown or defined by `output_shapes`. - -Note: The current implementation of `Dataset.from_generator()` uses -`tf.numpy_function` and inherits the same constraints. In particular, it -requires the dataset and iterator related operations to be placed -on a device in the same process as the Python program that called -`Dataset.from_generator()`. The body of `generator` will not be -serialized in a `GraphDef`, and you should not use this method if you -need to serialize your model and restore it in a different environment. - -Note: If `generator` depends on mutable global variables or other external -state, be aware that the runtime may invoke `generator` multiple times -(in order to support repeating the `Dataset`) and at any time -between the call to `Dataset.from_generator()` and the production of the -first element from the generator. Mutating global variables or external -state can cause undefined behavior, and we recommend that you explicitly -cache any external state in `generator` before calling -`Dataset.from_generator()`. - - - - - - - - - - - - - - - - - - - - - - -
Args
-`generator` - -A callable object that returns an object that supports the -`iter()` protocol. If `args` is not specified, `generator` must take no -arguments; otherwise it must take as many arguments as there are values -in `args`. -
-`output_types` - -(Optional.) A (nested) structure of `tf.DType` objects -corresponding to each component of an element yielded by `generator`. -
-`output_shapes` - -(Optional.) A (nested) structure of `tf.TensorShape` -objects corresponding to each component of an element yielded by -`generator`. -
-`args` - -(Optional.) A tuple of `tf.Tensor` objects that will be evaluated -and passed to `generator` as NumPy-array arguments. -
-`output_signature` - -(Optional.) A (nested) structure of `tf.TypeSpec` -objects corresponding to each component of an element yielded by -`generator`. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

from_sparse_tensor_slices

- - - -Splits each rank-N `tf.sparse.SparseTensor` in this dataset row-wise. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use `tf.data.Dataset.from_tensor_slices()`. - - - - - - - - - - -
Args
-`sparse_tensor` - -A `tf.sparse.SparseTensor`. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset` of rank-(N-1) sparse tensors. -
- - - -

from_tensor_slices

- - - -Creates a `Dataset` whose elements are slices of the given tensors. - -The given tensors are sliced along their first dimension. This operation -preserves the structure of the input tensors, removing the first dimension -of each tensor and using it as the dataset dimension. All input tensors -must have the same size in their first dimensions. - -``` ->>> # Slicing a 1D tensor produces scalar tensor elements. ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> list(dataset.as_numpy_iterator()) -[1, 2, 3] -``` - -``` ->>> # Slicing a 2D tensor produces 1D tensor elements. ->>> dataset = tf.data.Dataset.from_tensor_slices([[1, 2], [3, 4]]) ->>> list(dataset.as_numpy_iterator()) -[array([1, 2], dtype=int32), array([3, 4], dtype=int32)] -``` - -``` ->>> # Slicing a tuple of 1D tensors produces tuple elements containing ->>> # scalar tensors. ->>> dataset = tf.data.Dataset.from_tensor_slices(([1, 2], [3, 4], [5, 6])) ->>> list(dataset.as_numpy_iterator()) -[(1, 3, 5), (2, 4, 6)] -``` - -``` ->>> # Dictionary structure is also preserved. ->>> dataset = tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]}) ->>> list(dataset.as_numpy_iterator()) == [{'a': 1, 'b': 3}, -... {'a': 2, 'b': 4}] -True -``` - -``` ->>> # Two tensors can be combined into one Dataset object. ->>> features = tf.constant([[1, 3], [2, 1], [3, 3]]) # ==> 3x2 tensor ->>> labels = tf.constant(['A', 'B', 'A']) # ==> 3x1 tensor ->>> dataset = Dataset.from_tensor_slices((features, labels)) ->>> # Both the features and the labels tensors can be converted ->>> # to a Dataset object separately and combined after. ->>> features_dataset = Dataset.from_tensor_slices(features) ->>> labels_dataset = Dataset.from_tensor_slices(labels) ->>> dataset = Dataset.zip((features_dataset, labels_dataset)) ->>> # A batched feature and label set can be converted to a Dataset ->>> # in similar fashion. ->>> batched_features = tf.constant([[[1, 3], [2, 3]], -... [[2, 1], [1, 2]], -... [[3, 3], [3, 2]]], shape=(3, 2, 2)) ->>> batched_labels = tf.constant([['A', 'A'], -... ['B', 'B'], -... ['A', 'B']], shape=(3, 2, 1)) ->>> dataset = Dataset.from_tensor_slices((batched_features, batched_labels)) ->>> for element in dataset.as_numpy_iterator(): -... print(element) -(array([[1, 3], - [2, 3]], dtype=int32), array([[b'A'], - [b'A']], dtype=object)) -(array([[2, 1], - [1, 2]], dtype=int32), array([[b'B'], - [b'B']], dtype=object)) -(array([[3, 3], - [3, 2]], dtype=int32), array([[b'A'], - [b'B']], dtype=object)) -``` - -Note that if `tensors` contains a NumPy array, and eager execution is not -enabled, the values will be embedded in the graph as one or more -`tf.constant` operations. For large datasets (> 1 GB), this can waste -memory and run into byte limits of graph serialization. If `tensors` -contains one or more large NumPy arrays, consider the alternative described -in [this guide]( -https://tensorflow.org/guide/data#consuming_numpy_arrays). - - - - - - - - - - -
Args
-`tensors` - -A dataset element, whose components have the same first -dimension. Supported values are documented -[here](https://www.tensorflow.org/guide/data#dataset_structure). -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

from_tensors

- - - -Creates a `Dataset` with a single element, comprising the given tensors. - -`from_tensors` produces a dataset containing only a single element. To slice -the input tensor into multiple elements, use `from_tensor_slices` instead. - -``` ->>> dataset = tf.data.Dataset.from_tensors([1, 2, 3]) ->>> list(dataset.as_numpy_iterator()) -[array([1, 2, 3], dtype=int32)] ->>> dataset = tf.data.Dataset.from_tensors(([1, 2, 3], 'A')) ->>> list(dataset.as_numpy_iterator()) -[(array([1, 2, 3], dtype=int32), b'A')] -``` - -``` ->>> # You can use `from_tensors` to produce a dataset which repeats ->>> # the same example many times. ->>> example = tf.constant([1,2,3]) ->>> dataset = tf.data.Dataset.from_tensors(example).repeat(2) ->>> list(dataset.as_numpy_iterator()) -[array([1, 2, 3], dtype=int32), array([1, 2, 3], dtype=int32)] -``` - -Note that if `tensors` contains a NumPy array, and eager execution is not -enabled, the values will be embedded in the graph as one or more -`tf.constant` operations. For large datasets (> 1 GB), this can waste -memory and run into byte limits of graph serialization. If `tensors` -contains one or more large NumPy arrays, consider the alternative described -in [this -guide](https://tensorflow.org/guide/data#consuming_numpy_arrays). - - - - - - - - - - -
Args
-`tensors` - -A dataset "element". Supported values are documented -[here](https://www.tensorflow.org/guide/data#dataset_structure). -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

get_single_element

- - - -Returns the single element of the `dataset` as a nested structure of tensors. - -The function enables you to use a `tf.data.Dataset` in a stateless -"tensor-in tensor-out" expression, without creating an iterator. -This facilitates the ease of data transformation on tensors using the -optimized `tf.data.Dataset` abstraction on top of them. - -For example, lets consider a `preprocessing_fn` which would take as an -input the raw features and returns the processed feature along with -it's label. - -```python -def preprocessing_fn(raw_feature): - # ... the raw_feature is preprocessed as per the use-case - return feature - -raw_features = ... # input batch of BATCH_SIZE elements. -dataset = (tf.data.Dataset.from_tensor_slices(raw_features) - .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE) - .batch(BATCH_SIZE)) - -processed_features = dataset.get_single_element() -``` - -In the above example, the `raw_features` tensor of length=BATCH_SIZE -was converted to a `tf.data.Dataset`. Next, each of the `raw_feature` was -mapped using the `preprocessing_fn` and the processed features were -grouped into a single batch. The final `dataset` contains only one element -which is a batch of all the processed features. - -NOTE: The `dataset` should contain only one element. - -Now, instead of creating an iterator for the `dataset` and retrieving the -batch of features, the `tf.data.get_single_element()` function is used -to skip the iterator creation process and directly output the batch of -features. - -This can be particularly useful when your tensor transformations are -expressed as `tf.data.Dataset` operations, and you want to use those -transformations while serving your model. - -# Keras - -```python - -model = ... # A pre-built or custom model - -class PreprocessingModel(tf.keras.Model): - def __init__(self, model): - super().__init__(self) - self.model = model - - @tf.function(input_signature=[...]) - def serving_fn(self, data): - ds = tf.data.Dataset.from_tensor_slices(data) - ds = ds.map(preprocessing_fn, num_parallel_calls=BATCH_SIZE) - ds = ds.batch(batch_size=BATCH_SIZE) - return tf.argmax(self.model(ds.get_single_element()), axis=-1) - -preprocessing_model = PreprocessingModel(model) -your_exported_model_dir = ... # save the model to this path. -tf.saved_model.save(preprocessing_model, your_exported_model_dir, - signatures={'serving_default': preprocessing_model.serving_fn} - ) -``` - -# Estimator - -In the case of estimators, you need to generally define a `serving_input_fn` -which would require the features to be processed by the model while -inferencing. - -```python -def serving_input_fn(): - - raw_feature_spec = ... # Spec for the raw_features - input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( - raw_feature_spec, default_batch_size=None) - ) - serving_input_receiver = input_fn() - raw_features = serving_input_receiver.features - - def preprocessing_fn(raw_feature): - # ... the raw_feature is preprocessed as per the use-case - return feature - - dataset = (tf.data.Dataset.from_tensor_slices(raw_features) - .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE) - .batch(BATCH_SIZE)) - - processed_features = dataset.get_single_element() - - # Please note that the value of `BATCH_SIZE` should be equal to - # the size of the leading dimension of `raw_features`. This ensures - # that `dataset` has only element, which is a pre-requisite for - # using `dataset.get_single_element()`. - - return tf.estimator.export.ServingInputReceiver( - processed_features, serving_input_receiver.receiver_tensors) - -estimator = ... # A pre-built or custom estimator -estimator.export_saved_model(your_exported_model_dir, serving_input_fn) -``` - - - - - - - - - -
Returns
-A nested structure of `tf.Tensor` objects, corresponding to the single -element of `dataset`. -
- - - - - - - - - - - - -
Raises
-`InvalidArgumentError` - -(at runtime) if `dataset` does not contain exactly -one element. -
- - - -

group_by_window

- - - -Groups windows of elements by key and reduces them. - -This transformation maps each consecutive element in a dataset to a key -using `key_func` and groups the elements by key. It then applies -`reduce_func` to at most `window_size_func(key)` elements matching the same -key. All except the final window for each key will contain -`window_size_func(key)` elements; the final window may be smaller. - -You may provide either a constant `window_size` or a window size determined -by the key through `window_size_func`. - -``` ->>> dataset = tf.data.Dataset.range(10) ->>> window_size = 5 ->>> key_func = lambda x: x%2 ->>> reduce_func = lambda key, dataset: dataset.batch(window_size) ->>> dataset = dataset.group_by_window( -... key_func=key_func, -... reduce_func=reduce_func, -... window_size=window_size) ->>> for elem in dataset.as_numpy_iterator(): -... print(elem) -[0 2 4 6 8] -[1 3 5 7 9] -``` - - - - - - - - - - - - - - - - - - - -
Args
-`key_func` - -A function mapping a nested structure of tensors (having shapes -and types defined by `self.output_shapes` and `self.output_types`) to a -scalar `tf.int64` tensor. -
-`reduce_func` - -A function mapping a key and a dataset of up to `window_size` -consecutive elements matching that key to another dataset. -
-`window_size` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -consecutive elements matching the same key to combine in a single batch, -which will be passed to `reduce_func`. Mutually exclusive with -`window_size_func`. -
-`window_size_func` - -A function mapping a key to a `tf.int64` scalar -`tf.Tensor`, representing the number of consecutive elements matching -the same key to combine in a single batch, which will be passed to -`reduce_func`. Mutually exclusive with `window_size`. -
- - - - - - - - - - - -
Returns
-A `tf.data.Dataset` -
- - - - - - - - - - - - -
Raises
-`ValueError` - -if neither or both of {`window_size`, `window_size_func`} are -passed. -
- - - -

interleave

- - - -Maps `map_func` across this dataset, and interleaves the results. - -For example, you can use `Dataset.interleave()` to process many input files -concurrently: - -``` ->>> # Preprocess 4 files concurrently, and interleave blocks of 16 records ->>> # from each file. ->>> filenames = ["/var/data/file1.txt", "/var/data/file2.txt", -... "/var/data/file3.txt", "/var/data/file4.txt"] ->>> dataset = tf.data.Dataset.from_tensor_slices(filenames) ->>> def parse_fn(filename): -... return tf.data.Dataset.range(10) ->>> dataset = dataset.interleave(lambda x: -... tf.data.TextLineDataset(x).map(parse_fn, num_parallel_calls=1), -... cycle_length=4, block_length=16) -``` - -The `cycle_length` and `block_length` arguments control the order in which -elements are produced. `cycle_length` controls the number of input elements -that are processed concurrently. If you set `cycle_length` to 1, this -transformation will handle one input element at a time, and will produce -identical results to `tf.data.Dataset.flat_map`. In general, -this transformation will apply `map_func` to `cycle_length` input elements, -open iterators on the returned `Dataset` objects, and cycle through them -producing `block_length` consecutive elements from each iterator, and -consuming the next input element each time it reaches the end of an -iterator. - -#### For example: - - - -``` ->>> dataset = Dataset.range(1, 6) # ==> [ 1, 2, 3, 4, 5 ] ->>> # NOTE: New lines indicate "block" boundaries. ->>> dataset = dataset.interleave( -... lambda x: Dataset.from_tensors(x).repeat(6), -... cycle_length=2, block_length=4) ->>> list(dataset.as_numpy_iterator()) -[1, 1, 1, 1, - 2, 2, 2, 2, - 1, 1, - 2, 2, - 3, 3, 3, 3, - 4, 4, 4, 4, - 3, 3, - 4, 4, - 5, 5, 5, 5, - 5, 5] -``` - -Note: The order of elements yielded by this transformation is -deterministic, as long as `map_func` is a pure function and -`deterministic=True`. If `map_func` contains any stateful operations, the -order in which that state is accessed is undefined. - -Performance can often be improved by setting `num_parallel_calls` so that -`interleave` will use multiple threads to fetch elements. If determinism -isn't required, it can also improve performance to set -`deterministic=False`. - -``` ->>> filenames = ["/var/data/file1.txt", "/var/data/file2.txt", -... "/var/data/file3.txt", "/var/data/file4.txt"] ->>> dataset = tf.data.Dataset.from_tensor_slices(filenames) ->>> dataset = dataset.interleave(lambda x: tf.data.TFRecordDataset(x), -... cycle_length=4, num_parallel_calls=tf.data.AUTOTUNE, -... deterministic=False) -``` - - - - - - - - - - - - - - - - - - - - - - -
Args
-`map_func` - -A function mapping a dataset element to a dataset. -
-`cycle_length` - -(Optional.) The number of input elements that will be -processed concurrently. If not set, the tf.data runtime decides what it -should be based on available CPU. If `num_parallel_calls` is set to -`tf.data.AUTOTUNE`, the `cycle_length` argument identifies -the maximum degree of parallelism. -
-`block_length` - -(Optional.) The number of consecutive elements to produce -from each input element before cycling to another input element. If not -set, defaults to 1. -
-`num_parallel_calls` - -(Optional.) If specified, the implementation creates a -threadpool, which is used to fetch inputs from cycle elements -asynchronously and in parallel. The default behavior is to fetch inputs -from cycle elements synchronously with no parallelism. If the value -`tf.data.AUTOTUNE` is used, then the number of parallel -calls is set dynamically based on available CPU. -
-`deterministic` - -(Optional.) When `num_parallel_calls` is specified, if this -boolean is specified (`True` or `False`), it controls the order in which -the transformation produces elements. If set to `False`, the -transformation is allowed to yield elements out of order to trade -determinism for performance. If not specified, the -`tf.data.Options.experimental_deterministic` option -(`True` by default) controls the behavior. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

list_files

- - - -A dataset of all files matching one or more glob patterns. - -The `file_pattern` argument should be a small number of glob patterns. -If your filenames have already been globbed, use -`Dataset.from_tensor_slices(filenames)` instead, as re-globbing every -filename with `list_files` may result in poor performance with remote -storage systems. - -Note: The default behavior of this method is to return filenames in -a non-deterministic random shuffled order. Pass a `seed` or `shuffle=False` -to get results in a deterministic order. - -#### Example: - -If we had the following files on our filesystem: - - - /path/to/dir/a.txt - - /path/to/dir/b.py - - /path/to/dir/c.py - -If we pass "/path/to/dir/*.py" as the directory, the dataset -would produce: - - - /path/to/dir/b.py - - /path/to/dir/c.py - - - - - - - - - - - - - - - - - - -
Args
-`file_pattern` - -A string, a list of strings, or a `tf.Tensor` of string type -(scalar or vector), representing the filename glob (i.e. shell wildcard) -pattern(s) that will be matched. -
-`shuffle` - -(Optional.) If `True`, the file names will be shuffled randomly. -Defaults to `True`. -
-`seed` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random -seed that will be used to create the distribution. See -`tf.random.set_seed` for behavior. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset` of strings corresponding to file names. -
- - - -

make_initializable_iterator

- - - -Creates an iterator for elements of this dataset. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -This is a deprecated API that should only be used in TF 1 graph mode and legacy TF 2 graph mode available through `tf.compat.v1`. In all other situations -- namely, eager mode and inside `tf.function` -- you can consume dataset elements using `for elem in dataset: ...` or by explicitly creating iterator via `iterator = iter(dataset)` and fetching its elements via `values = next(iterator)`. Furthermore, this API is not available in TF 2. During the transition from TF 1 to TF 2 you can use `tf.compat.v1.data.make_initializable_iterator(dataset)` to create a TF 1 graph mode style iterator for a dataset created through TF 2 APIs. Note that this should be a transient state of your code base as there are in general no guarantees about the interoperability of TF 1 and TF 2 code. - -Note: The returned iterator will be in an uninitialized state, -and you must run the `iterator.initializer` operation before using it: - -```python -# Building graph ... -dataset = ... -iterator = dataset.make_initializable_iterator() -next_value = iterator.get_next() # This is a Tensor. - -# ... from within a session ... -sess.run(iterator.initializer) -try: - while True: - value = sess.run(next_value) - ... -except tf.errors.OutOfRangeError: - pass -``` - - - - - - - - - - -
Args
-`shared_name` - -(Optional.) If non-empty, the returned iterator will be -shared under the given name across multiple sessions that share the same -devices (e.g. when using a remote server). -
- - - - - - - - - - - -
Returns
-A `tf.data.Iterator` for elements of this dataset. -
- - - - - - - - - - - - -
Raises
-`RuntimeError` - -If eager execution is enabled. -
- - - -

make_one_shot_iterator

- - - -Creates an iterator for elements of this dataset. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -This is a deprecated API that should only be used in TF 1 graph mode and legacy TF 2 graph mode available through `tf.compat.v1`. In all other situations -- namely, eager mode and inside `tf.function` -- you can consume dataset elements using `for elem in dataset: ...` or by explicitly creating iterator via `iterator = iter(dataset)` and fetching its elements via `values = next(iterator)`. Furthermore, this API is not available in TF 2. During the transition from TF 1 to TF 2 you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)` to create a TF 1 graph mode style iterator for a dataset created through TF 2 APIs. Note that this should be a transient state of your code base as there are in general no guarantees about the interoperability of TF 1 and TF 2 code. - -Note: The returned iterator will be initialized automatically. -A "one-shot" iterator does not currently support re-initialization. For -that see `make_initializable_iterator`. - -#### Example: - - - -```python -# Building graph ... -dataset = ... -next_value = dataset.make_one_shot_iterator().get_next() - -# ... from within a session ... -try: - while True: - value = sess.run(next_value) - ... -except tf.errors.OutOfRangeError: - pass -``` - - - - - - - - - -
Returns
-An `tf.data.Iterator` for elements of this dataset. -
- - - -

map

- - - -Maps `map_func` across the elements of this dataset. - -This transformation applies `map_func` to each element of this dataset, and -returns a new dataset containing the transformed elements, in the same -order as they appeared in the input. `map_func` can be used to change both -the values and the structure of a dataset's elements. Supported structure -constructs are documented -[here](https://www.tensorflow.org/guide/data#dataset_structure). - -For example, `map` can be used for adding 1 to each element, or projecting a -subset of element components. - -``` ->>> dataset = Dataset.range(1, 6) # ==> [ 1, 2, 3, 4, 5 ] ->>> dataset = dataset.map(lambda x: x + 1) ->>> list(dataset.as_numpy_iterator()) -[2, 3, 4, 5, 6] -``` - -The input signature of `map_func` is determined by the structure of each -element in this dataset. - -``` ->>> dataset = Dataset.range(5) ->>> # `map_func` takes a single argument of type `tf.Tensor` with the same ->>> # shape and dtype. ->>> result = dataset.map(lambda x: x + 1) -``` - -``` ->>> # Each element is a tuple containing two `tf.Tensor` objects. ->>> elements = [(1, "foo"), (2, "bar"), (3, "baz")] ->>> dataset = tf.data.Dataset.from_generator( -... lambda: elements, (tf.int32, tf.string)) ->>> # `map_func` takes two arguments of type `tf.Tensor`. This function ->>> # projects out just the first component. ->>> result = dataset.map(lambda x_int, y_str: x_int) ->>> list(result.as_numpy_iterator()) -[1, 2, 3] -``` - -``` ->>> # Each element is a dictionary mapping strings to `tf.Tensor` objects. ->>> elements = ([{"a": 1, "b": "foo"}, -... {"a": 2, "b": "bar"}, -... {"a": 3, "b": "baz"}]) ->>> dataset = tf.data.Dataset.from_generator( -... lambda: elements, {"a": tf.int32, "b": tf.string}) ->>> # `map_func` takes a single argument of type `dict` with the same keys ->>> # as the elements. ->>> result = dataset.map(lambda d: str(d["a"]) + d["b"]) -``` - -The value or values returned by `map_func` determine the structure of each -element in the returned dataset. - -``` ->>> dataset = tf.data.Dataset.range(3) ->>> # `map_func` returns two `tf.Tensor` objects. ->>> def g(x): -... return tf.constant(37.0), tf.constant(["Foo", "Bar", "Baz"]) ->>> result = dataset.map(g) ->>> result.element_spec -(TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(3,), dtype=tf.string, name=None)) ->>> # Python primitives, lists, and NumPy arrays are implicitly converted to ->>> # `tf.Tensor`. ->>> def h(x): -... return 37.0, ["Foo", "Bar"], np.array([1.0, 2.0], dtype=np.float64) ->>> result = dataset.map(h) ->>> result.element_spec -(TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(2,), dtype=tf.string, name=None), TensorSpec(shape=(2,), dtype=tf.float64, name=None)) ->>> # `map_func` can return nested structures. ->>> def i(x): -... return (37.0, [42, 16]), "foo" ->>> result = dataset.map(i) ->>> result.element_spec -((TensorSpec(shape=(), dtype=tf.float32, name=None), - TensorSpec(shape=(2,), dtype=tf.int32, name=None)), - TensorSpec(shape=(), dtype=tf.string, name=None)) -``` - -`map_func` can accept as arguments and return any type of dataset element. - -Note that irrespective of the context in which `map_func` is defined (eager -vs. graph), tf.data traces the function and executes it as a graph. To use -Python code inside of the function you have a few options: - -1) Rely on AutoGraph to convert Python code into an equivalent graph -computation. The downside of this approach is that AutoGraph can convert -some but not all Python code. - -2) Use `tf.py_function`, which allows you to write arbitrary Python code but -will generally result in worse performance than 1). For example: - -``` ->>> d = tf.data.Dataset.from_tensor_slices(['hello', 'world']) ->>> # transform a string tensor to upper case string using a Python function ->>> def upper_case_fn(t: tf.Tensor): -... return t.numpy().decode('utf-8').upper() ->>> d = d.map(lambda x: tf.py_function(func=upper_case_fn, -... inp=[x], Tout=tf.string)) ->>> list(d.as_numpy_iterator()) -[b'HELLO', b'WORLD'] -``` - -3) Use `tf.numpy_function`, which also allows you to write arbitrary -Python code. Note that `tf.py_function` accepts `tf.Tensor` whereas -`tf.numpy_function` accepts numpy arrays and returns only numpy arrays. -For example: - -``` ->>> d = tf.data.Dataset.from_tensor_slices(['hello', 'world']) ->>> def upper_case_fn(t: np.ndarray): -... return t.decode('utf-8').upper() ->>> d = d.map(lambda x: tf.numpy_function(func=upper_case_fn, -... inp=[x], Tout=tf.string)) ->>> list(d.as_numpy_iterator()) -[b'HELLO', b'WORLD'] -``` - -Note that the use of `tf.numpy_function` and `tf.py_function` -in general precludes the possibility of executing user-defined -transformations in parallel (because of Python GIL). - -Performance can often be improved by setting `num_parallel_calls` so that -`map` will use multiple threads to process elements. If deterministic order -isn't required, it can also improve performance to set -`deterministic=False`. - -``` ->>> dataset = Dataset.range(1, 6) # ==> [ 1, 2, 3, 4, 5 ] ->>> dataset = dataset.map(lambda x: x + 1, -... num_parallel_calls=tf.data.AUTOTUNE, -... deterministic=False) -``` - -The order of elements yielded by this transformation is deterministic if -`deterministic=True`. If `map_func` contains stateful operations and -`num_parallel_calls > 1`, the order in which that state is accessed is -undefined, so the values of output elements may not be deterministic -regardless of the `deterministic` flag value. - - - - - - - - - - - - - - - - -
Args
-`map_func` - -A function mapping a dataset element to another dataset element. -
-`num_parallel_calls` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, -representing the number elements to process asynchronously in parallel. -If not specified, elements will be processed sequentially. If the value -`tf.data.AUTOTUNE` is used, then the number of parallel -calls is set dynamically based on available CPU. -
-`deterministic` - -(Optional.) When `num_parallel_calls` is specified, if this -boolean is specified (`True` or `False`), it controls the order in which -the transformation produces elements. If set to `False`, the -transformation is allowed to yield elements out of order to trade -determinism for performance. If not specified, the -`tf.data.Options.experimental_deterministic` option -(`True` by default) controls the behavior. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

map_with_legacy_function

- - - -Maps `map_func` across the elements of this dataset. (deprecated) - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use `tf.data.Dataset.map() - -Note: This is an escape hatch for existing uses of `map` that do not work -with V2 functions. New uses are strongly discouraged and existing uses -should migrate to `map` as this method will be removed in V2. - - - - - - - - - - - - - - - - -
Args
-`map_func` - -A function mapping a (nested) structure of tensors (having -shapes and types defined by `self.output_shapes` and -`self.output_types`) to another (nested) structure of tensors. -
-`num_parallel_calls` - -(Optional.) A `tf.int32` scalar `tf.Tensor`, -representing the number elements to process asynchronously in parallel. -If not specified, elements will be processed sequentially. If the value -`tf.data.AUTOTUNE` is used, then the number of parallel -calls is set dynamically based on available CPU. -
-`deterministic` - -(Optional.) When `num_parallel_calls` is specified, this -boolean controls the order in which the transformation produces -elements. If set to `False`, the transformation is allowed to yield -elements out of order to trade determinism for performance. If not -specified, the `tf.data.Options.experimental_deterministic` option -(`True` by default) controls the behavior. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

options

- - - -Returns the options for this dataset and its inputs. - - - - - - - - - - -
Returns
-A `tf.data.Options` object representing the dataset options. -
- - - -

padded_batch

- - - -Combines consecutive elements of this dataset into padded batches. - -This transformation combines multiple consecutive elements of the input -dataset into a single element. - -Like `tf.data.Dataset.batch`, the components of the resulting element will -have an additional outer dimension, which will be `batch_size` (or -`N % batch_size` for the last element if `batch_size` does not divide the -number of input elements `N` evenly and `drop_remainder` is `False`). If -your program depends on the batches having the same outer dimension, you -should set the `drop_remainder` argument to `True` to prevent the smaller -batch from being produced. - -Unlike `tf.data.Dataset.batch`, the input elements to be batched may have -different shapes, and this transformation will pad each component to the -respective shape in `padded_shapes`. The `padded_shapes` argument -determines the resulting shape for each dimension of each component in an -output element: - -* If the dimension is a constant, the component will be padded out to that - length in that dimension. -* If the dimension is unknown, the component will be padded out to the - maximum length of all elements in that dimension. - -``` ->>> A = (tf.data.Dataset -... .range(1, 5, output_type=tf.int32) -... .map(lambda x: tf.fill([x], x))) ->>> # Pad to the smallest per-batch size that fits all elements. ->>> B = A.padded_batch(2) ->>> for element in B.as_numpy_iterator(): -... print(element) -[[1 0] - [2 2]] -[[3 3 3 0] - [4 4 4 4]] ->>> # Pad to a fixed size. ->>> C = A.padded_batch(2, padded_shapes=5) ->>> for element in C.as_numpy_iterator(): -... print(element) -[[1 0 0 0 0] - [2 2 0 0 0]] -[[3 3 3 0 0] - [4 4 4 4 0]] ->>> # Pad with a custom value. ->>> D = A.padded_batch(2, padded_shapes=5, padding_values=-1) ->>> for element in D.as_numpy_iterator(): -... print(element) -[[ 1 -1 -1 -1 -1] - [ 2 2 -1 -1 -1]] -[[ 3 3 3 -1 -1] - [ 4 4 4 4 -1]] ->>> # Components of nested elements can be padded independently. ->>> elements = [([1, 2, 3], [10]), -... ([4, 5], [11, 12])] ->>> dataset = tf.data.Dataset.from_generator( -... lambda: iter(elements), (tf.int32, tf.int32)) ->>> # Pad the first component of the tuple to length 4, and the second ->>> # component to the smallest size that fits. ->>> dataset = dataset.padded_batch(2, -... padded_shapes=([4], [None]), -... padding_values=(-1, 100)) ->>> list(dataset.as_numpy_iterator()) -[(array([[ 1, 2, 3, -1], [ 4, 5, -1, -1]], dtype=int32), - array([[ 10, 100], [ 11, 12]], dtype=int32))] ->>> # Pad with a single value and multiple components. ->>> E = tf.data.Dataset.zip((A, A)).padded_batch(2, padding_values=-1) ->>> for element in E.as_numpy_iterator(): -... print(element) -(array([[ 1, -1], - [ 2, 2]], dtype=int32), array([[ 1, -1], - [ 2, 2]], dtype=int32)) -(array([[ 3, 3, 3, -1], - [ 4, 4, 4, 4]], dtype=int32), array([[ 3, 3, 3, -1], - [ 4, 4, 4, 4]], dtype=int32)) -``` - -See also `tf.data.experimental.dense_to_sparse_batch`, which combines -elements that may have different shapes into a `tf.sparse.SparseTensor`. - - - - - - - - - - - - - - - - - - - -
Args
-`batch_size` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -consecutive elements of this dataset to combine in a single batch. -
-`padded_shapes` - -(Optional.) A (nested) structure of `tf.TensorShape` or -`tf.int64` vector tensor-like objects representing the shape to which -the respective component of each input element should be padded prior -to batching. Any unknown dimensions will be padded to the maximum size -of that dimension in each batch. If unset, all dimensions of all -components are padded to the maximum size in the batch. `padded_shapes` -must be set if any component has an unknown rank. -
-`padding_values` - -(Optional.) A (nested) structure of scalar-shaped -`tf.Tensor`, representing the padding values to use for the respective -components. None represents that the (nested) structure should be padded -with default values. Defaults are `0` for numeric types and the empty -string for string types. The `padding_values` should have the same -(nested) structure as the input dataset. If `padding_values` is a single -element and the input dataset has multiple components, then the same -`padding_values` will be used to pad every component of the dataset. -If `padding_values` is a scalar, then its value will be broadcasted -to match the shape of each component. -
-`drop_remainder` - -(Optional.) A `tf.bool` scalar `tf.Tensor`, representing -whether the last batch should be dropped in the case it has fewer than -`batch_size` elements; the default behavior is not to drop the smaller -batch. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - - - - - - - - - - -
Raises
-`ValueError` - -If a component has an unknown rank, and the `padded_shapes` -argument is not set. -
- - - -

prefetch

- - - -Creates a `Dataset` that prefetches elements from this dataset. - -Most dataset input pipelines should end with a call to `prefetch`. This -allows later elements to be prepared while the current element is being -processed. This often improves latency and throughput, at the cost of -using additional memory to store prefetched elements. - -Note: Like other `Dataset` methods, prefetch operates on the -elements of the input dataset. It has no concept of examples vs. batches. -`examples.prefetch(2)` will prefetch two elements (2 examples), -while `examples.batch(20).prefetch(2)` will prefetch 2 elements -(2 batches, of 20 examples each). - -``` ->>> dataset = tf.data.Dataset.range(3) ->>> dataset = dataset.prefetch(2) ->>> list(dataset.as_numpy_iterator()) -[0, 1, 2] -``` - - - - - - - - - - -
Args
-`buffer_size` - -A `tf.int64` scalar `tf.Tensor`, representing the maximum -number of elements that will be buffered when prefetching. If the value -`tf.data.AUTOTUNE` is used, then the buffer size is dynamically tuned. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

range

- - - -Creates a `Dataset` of a step-separated range of values. - -``` ->>> list(Dataset.range(5).as_numpy_iterator()) -[0, 1, 2, 3, 4] ->>> list(Dataset.range(2, 5).as_numpy_iterator()) -[2, 3, 4] ->>> list(Dataset.range(1, 5, 2).as_numpy_iterator()) -[1, 3] ->>> list(Dataset.range(1, 5, -2).as_numpy_iterator()) -[] ->>> list(Dataset.range(5, 1).as_numpy_iterator()) -[] ->>> list(Dataset.range(5, 1, -2).as_numpy_iterator()) -[5, 3] ->>> list(Dataset.range(2, 5, output_type=tf.int32).as_numpy_iterator()) -[2, 3, 4] ->>> list(Dataset.range(1, 5, 2, output_type=tf.float32).as_numpy_iterator()) -[1.0, 3.0] -``` - - - - - - - - - - - - - -
Args
-`*args` - -follows the same semantics as python's xrange. -len(args) == 1 -> start = 0, stop = args[0], step = 1. -len(args) == 2 -> start = args[0], stop = args[1], step = 1. -len(args) == 3 -> start = args[0], stop = args[1], step = args[2]. -
-`**kwargs` - -- output_type: Its expected dtype. (Optional, default: `tf.int64`). -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `RangeDataset`. -
- - - - - - - - - - - - -
Raises
-`ValueError` - -if len(args) == 0. -
- - - -

reduce

- - - -Reduces the input dataset to a single element. - -The transformation calls `reduce_func` successively on every element of -the input dataset until the dataset is exhausted, aggregating information in -its internal state. The `initial_state` argument is used for the initial -state and the final state is returned as the result. - -``` ->>> tf.data.Dataset.range(5).reduce(np.int64(0), lambda x, _: x + 1).numpy() -5 ->>> tf.data.Dataset.range(5).reduce(np.int64(0), lambda x, y: x + y).numpy() -10 -``` - - - - - - - - - - - - - -
Args
-`initial_state` - -An element representing the initial state of the -transformation. -
-`reduce_func` - -A function that maps `(old_state, input_element)` to -`new_state`. It must take two arguments and return a new element -The structure of `new_state` must match the structure of -`initial_state`. -
- - - - - - - - - - - -
Returns
-A dataset element corresponding to the final state of the transformation. -
- - - -

repeat

- - - -Repeats this dataset so each original value is seen `count` times. - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3]) ->>> dataset = dataset.repeat(3) ->>> list(dataset.as_numpy_iterator()) -[1, 2, 3, 1, 2, 3, 1, 2, 3] -``` - -Note: If this dataset is a function of global state (e.g. a random number -generator), then different repetitions may produce different elements. - - - - - - - - - - -
Args
-`count` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, representing the -number of times the dataset should be repeated. The default behavior (if -`count` is `None` or `-1`) is for the dataset be repeated indefinitely. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

shard

- - - -Creates a `Dataset` that includes only 1/`num_shards` of this dataset. - -`shard` is deterministic. The Dataset produced by `A.shard(n, i)` will -contain all elements of A whose index mod n = i. - -``` ->>> A = tf.data.Dataset.range(10) ->>> B = A.shard(num_shards=3, index=0) ->>> list(B.as_numpy_iterator()) -[0, 3, 6, 9] ->>> C = A.shard(num_shards=3, index=1) ->>> list(C.as_numpy_iterator()) -[1, 4, 7] ->>> D = A.shard(num_shards=3, index=2) ->>> list(D.as_numpy_iterator()) -[2, 5, 8] -``` - -This dataset operator is very useful when running distributed training, as -it allows each worker to read a unique subset. - -When reading a single input file, you can shard elements as follows: - -```python -d = tf.data.TFRecordDataset(input_file) -d = d.shard(num_workers, worker_index) -d = d.repeat(num_epochs) -d = d.shuffle(shuffle_buffer_size) -d = d.map(parser_fn, num_parallel_calls=num_map_threads) -``` - -#### Important caveats: - - - -- Be sure to shard before you use any randomizing operator (such as - shuffle). -- Generally it is best if the shard operator is used early in the dataset - pipeline. For example, when reading from a set of TFRecord files, shard - before converting the dataset to input samples. This avoids reading every - file on every worker. The following is an example of an efficient - sharding strategy within a complete pipeline: - -```python -d = Dataset.list_files(pattern) -d = d.shard(num_workers, worker_index) -d = d.repeat(num_epochs) -d = d.shuffle(shuffle_buffer_size) -d = d.interleave(tf.data.TFRecordDataset, - cycle_length=num_readers, block_length=1) -d = d.map(parser_fn, num_parallel_calls=num_map_threads) -``` - - - - - - - - - - - - - -
Args
-`num_shards` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -shards operating in parallel. -
-`index` - -A `tf.int64` scalar `tf.Tensor`, representing the worker index. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - - - - - - - - - - -
Raises
-`InvalidArgumentError` - -if `num_shards` or `index` are illegal values. - -Note: error checking is done on a best-effort basis, and errors aren't -guaranteed to be caught upon dataset creation. (e.g. providing in a -placeholder tensor bypasses the early checking, and will instead result -in an error during a session.run call.) -
- - - -

shuffle

- - - -Randomly shuffles the elements of this dataset. - -This dataset fills a buffer with `buffer_size` elements, then randomly -samples elements from this buffer, replacing the selected elements with new -elements. For perfect shuffling, a buffer size greater than or equal to the -full size of the dataset is required. - -For instance, if your dataset contains 10,000 elements but `buffer_size` is -set to 1,000, then `shuffle` will initially select a random element from -only the first 1,000 elements in the buffer. Once an element is selected, -its space in the buffer is replaced by the next (i.e. 1,001-st) element, -maintaining the 1,000 element buffer. - -`reshuffle_each_iteration` controls whether the shuffle order should be -different for each epoch. In TF 1.X, the idiomatic way to create epochs -was through the `repeat` transformation: - -```python -dataset = tf.data.Dataset.range(3) -dataset = dataset.shuffle(3, reshuffle_each_iteration=True) -dataset = dataset.repeat(2) -# [1, 0, 2, 1, 2, 0] - -dataset = tf.data.Dataset.range(3) -dataset = dataset.shuffle(3, reshuffle_each_iteration=False) -dataset = dataset.repeat(2) -# [1, 0, 2, 1, 0, 2] -``` - -In TF 2.0, `tf.data.Dataset` objects are Python iterables which makes it -possible to also create epochs through Python iteration: - -```python -dataset = tf.data.Dataset.range(3) -dataset = dataset.shuffle(3, reshuffle_each_iteration=True) -list(dataset.as_numpy_iterator()) -# [1, 0, 2] -list(dataset.as_numpy_iterator()) -# [1, 2, 0] -``` - -```python -dataset = tf.data.Dataset.range(3) -dataset = dataset.shuffle(3, reshuffle_each_iteration=False) -list(dataset.as_numpy_iterator()) -# [1, 0, 2] -list(dataset.as_numpy_iterator()) -# [1, 0, 2] -``` - - - - - - - - - - - - - - - - -
Args
-`buffer_size` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -elements from this dataset from which the new dataset will sample. -
-`seed` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random -seed that will be used to create the distribution. See -`tf.random.set_seed` for behavior. -
-`reshuffle_each_iteration` - -(Optional.) A boolean, which if true indicates -that the dataset should be pseudorandomly reshuffled each time it is -iterated over. (Defaults to `True`.) -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

skip

- - - -Creates a `Dataset` that skips `count` elements from this dataset. - -``` ->>> dataset = tf.data.Dataset.range(10) ->>> dataset = dataset.skip(7) ->>> list(dataset.as_numpy_iterator()) -[7, 8, 9] -``` - - - - - - - - - - -
Args
-`count` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -elements of this dataset that should be skipped to form the new dataset. -If `count` is greater than the size of this dataset, the new dataset -will contain no elements. If `count` is -1, skips the entire dataset. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

take

- - - -Creates a `Dataset` with at most `count` elements from this dataset. - -``` ->>> dataset = tf.data.Dataset.range(10) ->>> dataset = dataset.take(3) ->>> list(dataset.as_numpy_iterator()) -[0, 1, 2] -``` - - - - - - - - - - -
Args
-`count` - -A `tf.int64` scalar `tf.Tensor`, representing the number of -elements of this dataset that should be taken to form the new dataset. -If `count` is -1, or if `count` is greater than the size of this -dataset, the new dataset will contain all elements of this dataset. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

unbatch

- - - -Splits elements of a dataset into multiple elements. - -For example, if elements of the dataset are shaped `[B, a0, a1, ...]`, -where `B` may vary for each input element, then for each element in the -dataset, the unbatched dataset will contain `B` consecutive elements -of shape `[a0, a1, ...]`. - -``` ->>> elements = [ [1, 2, 3], [1, 2], [1, 2, 3, 4] ] ->>> dataset = tf.data.Dataset.from_generator(lambda: elements, tf.int64) ->>> dataset = dataset.unbatch() ->>> list(dataset.as_numpy_iterator()) -[1, 2, 3, 1, 2, 1, 2, 3, 4] -``` - -Note: `unbatch` requires a data copy to slice up the batched tensor into -smaller, unbatched tensors. When optimizing performance, try to avoid -unnecessary usage of `unbatch`. - - - - - - - - - -
Returns
-A `Dataset`. -
- - - -

window

- - - -Combines (nests of) input elements into a dataset of (nests of) windows. - -A "window" is a finite dataset of flat elements of size `size` (or possibly -fewer if there are not enough input elements to fill the window and -`drop_remainder` evaluates to `False`). - -The `shift` argument determines the number of input elements by which the -window moves on each iteration. If windows and elements are both numbered -starting at 0, the first element in window `k` will be element `k * shift` -of the input dataset. In particular, the first element of the first window -will always be the first element of the input dataset. - -The `stride` argument determines the stride of the input elements, and the -`shift` argument determines the shift of the window. - -#### For example: - - - -``` ->>> dataset = tf.data.Dataset.range(7).window(2) ->>> for window in dataset: -... print(list(window.as_numpy_iterator())) -[0, 1] -[2, 3] -[4, 5] -[6] ->>> dataset = tf.data.Dataset.range(7).window(3, 2, 1, True) ->>> for window in dataset: -... print(list(window.as_numpy_iterator())) -[0, 1, 2] -[2, 3, 4] -[4, 5, 6] ->>> dataset = tf.data.Dataset.range(7).window(3, 1, 2, True) ->>> for window in dataset: -... print(list(window.as_numpy_iterator())) -[0, 2, 4] -[1, 3, 5] -[2, 4, 6] -``` - -Note that when the `window` transformation is applied to a dataset of -nested elements, it produces a dataset of nested windows. - -``` ->>> nested = ([1, 2, 3, 4], [5, 6, 7, 8]) ->>> dataset = tf.data.Dataset.from_tensor_slices(nested).window(2) ->>> for window in dataset: -... def to_numpy(ds): -... return list(ds.as_numpy_iterator()) -... print(tuple(to_numpy(component) for component in window)) -([1, 2], [5, 6]) -([3, 4], [7, 8]) -``` - -``` ->>> dataset = tf.data.Dataset.from_tensor_slices({'a': [1, 2, 3, 4]}) ->>> dataset = dataset.window(2) ->>> for window in dataset: -... def to_numpy(ds): -... return list(ds.as_numpy_iterator()) -... print({'a': to_numpy(window['a'])}) -{'a': [1, 2]} -{'a': [3, 4]} -``` - - - - - - - - - - - - - - - - - - - -
Args
-`size` - -A `tf.int64` scalar `tf.Tensor`, representing the number of elements -of the input dataset to combine into a window. Must be positive. -
-`shift` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, representing the -number of input elements by which the window moves in each iteration. -Defaults to `size`. Must be positive. -
-`stride` - -(Optional.) A `tf.int64` scalar `tf.Tensor`, representing the -stride of the input elements in the sliding window. Must be positive. -The default value of 1 means "retain every input element". -
-`drop_remainder` - -(Optional.) A `tf.bool` scalar `tf.Tensor`, representing -whether the last windows should be dropped if their size is smaller than -`size`. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset` of (nests of) windows -- a finite datasets of flat -elements created from the (nests of) input elements. -
- - - -

with_options

- - - -Returns a new `tf.data.Dataset` with the given options set. - -The options are "global" in the sense they apply to the entire dataset. -If options are set multiple times, they are merged as long as different -options do not use different non-default values. - -``` ->>> ds = tf.data.Dataset.range(5) ->>> ds = ds.interleave(lambda x: tf.data.Dataset.range(5), -... cycle_length=3, -... num_parallel_calls=3) ->>> options = tf.data.Options() ->>> # This will make the interleave order non-deterministic. ->>> options.experimental_deterministic = False ->>> ds = ds.with_options(options) -``` - - - - - - - - - - -
Args
-`options` - -A `tf.data.Options` that identifies the options the use. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset` with the given options. -
- - - - - - - - - - - - -
Raises
-`ValueError` - -when an option is set more than once to a non-default value -
- - - -

zip

- - - -Creates a `Dataset` by zipping together the given datasets. - -This method has similar semantics to the built-in `zip()` function -in Python, with the main difference being that the `datasets` -argument can be a (nested) structure of `Dataset` objects. The supported -nesting mechanisms are documented -[here] (https://www.tensorflow.org/guide/data#dataset_structure). - -``` ->>> # The nested structure of the `datasets` argument determines the ->>> # structure of elements in the resulting dataset. ->>> a = tf.data.Dataset.range(1, 4) # ==> [ 1, 2, 3 ] ->>> b = tf.data.Dataset.range(4, 7) # ==> [ 4, 5, 6 ] ->>> ds = tf.data.Dataset.zip((a, b)) ->>> list(ds.as_numpy_iterator()) -[(1, 4), (2, 5), (3, 6)] ->>> ds = tf.data.Dataset.zip((b, a)) ->>> list(ds.as_numpy_iterator()) -[(4, 1), (5, 2), (6, 3)] ->>> ->>> # The `datasets` argument may contain an arbitrary number of datasets. ->>> c = tf.data.Dataset.range(7, 13).batch(2) # ==> [ [7, 8], -... # [9, 10], -... # [11, 12] ] ->>> ds = tf.data.Dataset.zip((a, b, c)) ->>> for element in ds.as_numpy_iterator(): -... print(element) -(1, 4, array([7, 8])) -(2, 5, array([ 9, 10])) -(3, 6, array([11, 12])) ->>> ->>> # The number of elements in the resulting dataset is the same as ->>> # the size of the smallest dataset in `datasets`. ->>> d = tf.data.Dataset.range(13, 15) # ==> [ 13, 14 ] ->>> ds = tf.data.Dataset.zip((a, d)) ->>> list(ds.as_numpy_iterator()) -[(1, 13), (2, 14)] -``` - - - - - - - - - - -
Args
-`datasets` - -A (nested) structure of datasets. -
- - - - - - - - - - - - -
Returns
-`Dataset` - -A `Dataset`. -
- - - -

__bool__

- - - - - - -

__iter__

- - - -Creates an iterator for elements of this dataset. - -The returned iterator implements the Python Iterator protocol. - - - - - - - - - -
Returns
-An `tf.data.Iterator` for the elements of this dataset. -
- - - - - - - - - - - - -
Raises
-`RuntimeError` - -If not inside of tf.function and not executing eagerly. -
- - - -

__len__

- - - -Returns the length of the dataset if it is known and finite. - -This method requires that you are running in eager mode, and that the -length of the dataset is known and non-infinite. When the length may be -unknown or infinite, or if you are running in graph mode, use -`tf.data.Dataset.cardinality` instead. - - - - - - - - - -
Returns
-An integer representing the length of the dataset. -
- - - - - - - - - - - - -
Raises
-`RuntimeError` - -If the dataset length is unknown or infinite, or if eager -execution is not enabled. -
- - - -

__nonzero__

- - - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/parquet/calculate_parquet_values.md b/g3doc/api_docs/python/expression_impl/parquet/calculate_parquet_values.md deleted file mode 100644 index 393b3b9..0000000 --- a/g3doc/api_docs/python/expression_impl/parquet/calculate_parquet_values.md +++ /dev/null @@ -1,96 +0,0 @@ -description: Calculates expressions and returns a parquet dataset. - -
- - -
- -# expression_impl.parquet.calculate_parquet_values - - - - - - - - - -Calculates expressions and returns a parquet dataset. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-`expressions` - -A list of expressions to calculate. -
-`root_exp` - -The root placeholder expression to use as the feed dict. -
-`filenames` - -A list of parquet files. -
-`batch_size` - -The number of messages to batch. -
-`options` - -calculate options. -
- - - - - - - - - - - -
-A parquet dataset. -
- diff --git a/g3doc/api_docs/python/expression_impl/parquet/create_expression_from_parquet_file.md b/g3doc/api_docs/python/expression_impl/parquet/create_expression_from_parquet_file.md deleted file mode 100644 index d8f3f9e..0000000 --- a/g3doc/api_docs/python/expression_impl/parquet/create_expression_from_parquet_file.md +++ /dev/null @@ -1,65 +0,0 @@ -description: Creates a placeholder expression from a parquet file. - -
- - -
- -# expression_impl.parquet.create_expression_from_parquet_file - - - - - - - - - -Creates a placeholder expression from a parquet file. - - - - - - - - - - - - - - - - - -
-`filenames` - -A list of parquet files. -
- - - - - - - - - - - -
-A PlaceholderRootExpression that should be used as the root of an expression -graph. -
- diff --git a/g3doc/api_docs/python/expression_impl/placeholder.md b/g3doc/api_docs/python/expression_impl/placeholder.md deleted file mode 100644 index a39e524..0000000 --- a/g3doc/api_docs/python/expression_impl/placeholder.md +++ /dev/null @@ -1,50 +0,0 @@ -description: Placeholder expression. - -
- - -
- -# Module: expression_impl.placeholder - - - - - - - - - -Placeholder expression. - - -A placeholder expression represents prensor nodes, however a prensor is not -needed until calculate is called. This allows the user to apply expression -queries to a placeholder expression before having an actual prensor object. -When calculate is called on a placeholder expression (or a descendant of a -placeholder expression), the feed_dict will need to be passed in. Then calculate -will bind the prensor with the appropriate placeholder expression. - -#### Sample usage: - - - -``` -placeholder_exp = placeholder.create_expression_from_schema(schema) -new_exp = expression_queries(placeholder_exp, ..) -result = calculate.calculate_values([new_exp], - feed_dict={placeholder_exp: pren}) -# placeholder_exp requires a feed_dict to be passed in when calculating -``` - -## Functions - -[`create_expression_from_schema(...)`](../expression_impl/placeholder/create_expression_from_schema.md): Creates a placeholder expression from a parquet schema. - -[`get_placeholder_paths_from_graph(...)`](../expression_impl/placeholder/get_placeholder_paths_from_graph.md): Gets all placeholder paths from an expression graph. - diff --git a/g3doc/api_docs/python/expression_impl/placeholder/create_expression_from_schema.md b/g3doc/api_docs/python/expression_impl/placeholder/create_expression_from_schema.md deleted file mode 100644 index bf03a42..0000000 --- a/g3doc/api_docs/python/expression_impl/placeholder/create_expression_from_schema.md +++ /dev/null @@ -1,66 +0,0 @@ -description: Creates a placeholder expression from a parquet schema. - -
- - -
- -# expression_impl.placeholder.create_expression_from_schema - - - - - - - - - -Creates a placeholder expression from a parquet schema. - - - - - - - - - - - - - - - - - -
-`schema` - -The schema that describes the prensor tree that this placeholder -represents. -
- - - - - - - - - - - -
-A PlaceholderRootExpression that should be used as the root of an expression -graph. -
- diff --git a/g3doc/api_docs/python/expression_impl/placeholder/get_placeholder_paths_from_graph.md b/g3doc/api_docs/python/expression_impl/placeholder/get_placeholder_paths_from_graph.md deleted file mode 100644 index a7f008e..0000000 --- a/g3doc/api_docs/python/expression_impl/placeholder/get_placeholder_paths_from_graph.md +++ /dev/null @@ -1,66 +0,0 @@ -description: Gets all placeholder paths from an expression graph. - -
- - -
- -# expression_impl.placeholder.get_placeholder_paths_from_graph - - - - - - - - - -Gets all placeholder paths from an expression graph. - - - - - - - -This finds all leaf placeholder expressions in an expression graph, and gets -the path of these expressions. - - - - - - - - - - -
-`graph` - -expression graph -
- - - - - - - - - - - -
-a list of paths of placeholder expressions -
- diff --git a/g3doc/api_docs/python/expression_impl/project.md b/g3doc/api_docs/python/expression_impl/project.md deleted file mode 100644 index 8fe4c1a..0000000 --- a/g3doc/api_docs/python/expression_impl/project.md +++ /dev/null @@ -1,44 +0,0 @@ -description: project selects a subtree of an expression. - -
- - -
- -# Module: expression_impl.project - - - - - - - - - -project selects a subtree of an expression. - - -project is often used right before calculating the value. - -#### Example: - - - -``` -expr = ... -new_expr = project.project(expr, [path.Path(["foo","bar"]), - path.Path(["x", "y"])]) -[prensor_result] = calculate.calculate_prensors([new_expr]) -``` - -prensor_result now has two paths, "foo.bar" and "x.y". - -## Functions - -[`project(...)`](../expression_impl/project/project.md): select a subtree. - diff --git a/g3doc/api_docs/python/expression_impl/project/project.md b/g3doc/api_docs/python/expression_impl/project/project.md deleted file mode 100644 index f8ac3e2..0000000 --- a/g3doc/api_docs/python/expression_impl/project/project.md +++ /dev/null @@ -1,75 +0,0 @@ -description: select a subtree. - -
- - -
- -# expression_impl.project.project - - - - - - - - - -select a subtree. - - - - - - - -Paths not selected are removed. -Paths that are selected are "known", such that if calculate_prensors is -called, they will be in the result. - - - - - - - - - - - - - -
-`expr` - -the original expression. -
-`paths` - -the paths to include. -
- - - - - - - - - - - -
-A projected expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/promote.md b/g3doc/api_docs/python/expression_impl/promote.md deleted file mode 100644 index 7053010..0000000 --- a/g3doc/api_docs/python/expression_impl/promote.md +++ /dev/null @@ -1,118 +0,0 @@ -description: Promote an expression to be a child of its grandparent. - -
- - -
- -# Module: expression_impl.promote - - - - - - - - - -Promote an expression to be a child of its grandparent. - - -Promote is part of the standard flattening of data, promote_and_broadcast, -which takes structured data and flattens it. By directly accessing promote, -one can perform simpler operations. - -For example, suppose an expr represents: - -``` -+ -| -+-session* (stars indicate repeated) - | - +-event* - | - +-val*-int64 - -session: { - event: { - val: 111 - } - event: { - val: 121 - val: 122 - } -} - -session: { - event: { - val: 10 - val: 7 - } - event: { - val: 1 - } -} - -``` - -``` -promote.promote(expr, path.Path(["session", "event", "val"]), nval) -``` - -produces: - -``` -+ -| -+-session* (stars indicate repeated) - | - +-event* - | | - | +-val*-int64 - | - +-nval*-int64 - -session: { - event: { - val: 111 - } - event: { - val: 121 - val: 122 - } - nval: 111 - nval: 121 - nval: 122 -} - -session: { - event: { - val: 10 - val: 7 - } - event: { - val: 1 - } - nval: 10 - nval: 7 - nval: 1 -} -``` - -## Classes - -[`class PromoteChildExpression`](../expression_impl/promote/PromoteChildExpression.md): The root of the promoted sub tree. - -[`class PromoteExpression`](../expression_impl/promote/PromoteExpression.md): A promoted leaf. - -## Functions - -[`promote(...)`](../expression_impl/promote/promote.md): Promote a path to be a child of its grandparent, and give it a name. - -[`promote_anonymous(...)`](../expression_impl/promote/promote_anonymous.md): Promote a path to be a new anonymous child of its grandparent. - diff --git a/g3doc/api_docs/python/expression_impl/promote/PromoteChildExpression.md b/g3doc/api_docs/python/expression_impl/promote/PromoteChildExpression.md deleted file mode 100644 index 540f290..0000000 --- a/g3doc/api_docs/python/expression_impl/promote/PromoteChildExpression.md +++ /dev/null @@ -1,1044 +0,0 @@ -description: The root of the promoted sub tree. - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -# expression_impl.promote.PromoteChildExpression - - - - - - - - - -The root of the promoted sub tree. - - - - - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - -if the expression is repeated. -
-`my_type` - -the DType of a field, or None for an internal node. -
-`schema_feature` - -the local schema (StructDomain information should not be -present). -
- - - - - - - - - - - - - - - - - - - - - - - -
-`is_leaf` - -True iff the node tensor is a LeafNodeTensor. -
-`is_repeated` - -True iff the same parent value can have multiple children values. -
-`schema_feature` - -Return the schema of the field. -
-`type` - -dtype of the expression, or None if not a leaf expression. -
- - - -## Methods - -

apply

- - - - - - -

apply_schema

- - - - - - -

broadcast

- - - -Broadcasts the existing field at source_path to the sibling_field. - - -

calculate

- -View source - - - -Calculates the node tensor of the expression. - -The node tensor must be a function of the properties of the expression -and the node tensors of the expressions from get_source_expressions(). - -If is_leaf, then calculate must return a LeafNodeTensor. -Otherwise, it must return a ChildNodeTensor or RootNodeTensor. - -If calculate_is_identity is true, then this must return source_tensors[0]. - -Sometimes, for operations such as parsing the proto, calculate will return -additional information. For example, calculate() for the root of the -proto expression also parses out the tensors required to calculate the -tensors of the children. This is why destinations are required. - -For a reference use, see calculate_value_slowly(...) below. - - - - - - - - - - - - - - - - - - - -
Args
-`source_tensors` - -The node tensors of the expressions in -get_source_expressions(). -
-`destinations` - -The expressions that will use the output of this method. -
-`options` - -Options for the calculation. -
-`side_info` - -An optional prensor that is used to bind to a placeholder -expression. -
- - - - - - - - - - - -
Returns
-A NodeTensor representing the output of this expression. -
- - - -

calculation_equal

- -View source - - - -self.calculate is equal to another expression.calculate. - -Given the same source node tensors, self.calculate(...) and -expression.calculate(...) will have the same result. - -Note that this does not check that the source expressions of the two -expressions are the same. Therefore, two operations can have the same -calculation, but not the same output, because their sources are different. -For example, if a.calculation_is_identity() is True and -b.calculation_is_identity() is True, then a.calculation_equal(b) is True. -However, unless a and b have the same source, the expressions themselves are -not equal. - - - - - - - - - - -
Args
-`expression` - -The expression to compare to. -
- - - -

calculation_is_identity

- -View source - - - -True iff the self.calculate is the identity. - -There is exactly one source, and the output of self.calculate(...) is the -node tensor of this source. - -

cogroup_by_index

- - - -Creates a cogroup of left_name and right_name at new_field_name. - - -

create_has_field

- - - -Creates a field that is the presence of the source path. - - -

create_proto_index

- - - -Creates a proto index field as a direct child of the current root. - -The proto index maps each root element to the original batch index. -For example: [0, 2] means the first element came from the first proto -in the original input tensor and the second element came from the third -proto. The created field is always "dense" -- it has the same valency as -the current root. - - - - - - - - - - -
Args
-`field_name` - -the name of the field to be created. -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

create_size_field

- - - -Creates a field that is the size of the source path. - - -

get_child

- - - -Gets a named child. - - -

get_child_or_error

- - - -Gets a named child. - - -

get_descendant

- - - -Finds the descendant at the path. - - -

get_descendant_or_error

- - - -Finds the descendant at the path. - - -

get_known_children

- - - - - - -

get_known_descendants

- - - -Gets a mapping from known paths to subexpressions. - -The difference between this and get_descendants in Prensor is that -all paths in a Prensor are realized, thus all known. But an Expression's -descendants might not all be known at the point this method is called, -because an expression may have an infinite number of children. - - - - - - - - - -
Returns
-A mapping from paths (relative to the root of the subexpression) to -expressions. -
- - - -

get_paths_with_schema

- - - -Extract only paths that contain schema information. - - -

get_schema

- - - -Returns a schema for the entire tree. - - - - - - - - - - - -
Args
-`create_schema_features` - -If True, schema features are added for all -children and a schema entry is created if not available on the child. If -False, features are left off of the returned schema if there is no -schema_feature on the child. -
- - - -

get_source_expressions

- -View source - - - -Gets the sources of this expression. - -The node tensors of the source expressions must be sufficient to -calculate the node tensor of this expression -(see calculate and calculate_value_slowly). - - - - - - - - - -
Returns
-The sources of this expression. -
- - - -

known_field_names

- -View source - - - -Returns known field names of the expression. - - -Known field names of a parsed proto correspond to the fields declared in -the message. Examples of "unknown" fields are extensions and explicit casts -in an any field. The only way to know if an unknown field "(foo.bar)" is -present in an expression expr is to call (expr["(foo.bar)"] is not None). - -Notice that simply accessing a field does not make it "known". However, -setting a field (or setting a descendant of a field) will make it known. - -project(...) returns an expression where the known field names are the only -field names. In general, if you want to depend upon known_field_names -(e.g., if you want to compile a expression), then the best approach is to -project() the expression first. - - - - - - - - - -
Returns
-An immutable set of field names. -
- - - -

map_field_values

- - - -Map a primitive field to create a new primitive field. - -Note: the dtype argument is added since the v1 API. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the origin path. -
-`operator` - -an element-wise operator that takes a 1-dimensional vector. -
-`dtype` - -the type of the output. -
-`new_field_name` - -the name of a new sibling of source_path. -
- - - - - - - - - - - -
Returns
-the resulting root expression. -
- - - -

map_ragged_tensors

- - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

map_sparse_tensors

- - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

project

- - - -Constrains the paths to those listed. - - -

promote

- - - -Promotes source_path to be a field new_field_name in its grandparent. - - -

promote_and_broadcast

- - - - - - -

reroot

- - - -Returns a new list of protocol buffers available at new_root. - - -

schema_string

- - - -Returns a schema for the expression. - -E.g. - -repeated root: - optional int32 foo - optional bar: - optional string baz - optional int64 bak - -Note that unknown fields and subexpressions are not displayed. - - - - - - - - - - -
Args
-`limit` - -if present, limit the recursion. -
- - - - - - - - - - - -
Returns
-A string, describing (a part of) the schema. -
- - - -

slice

- - - -Creates a slice copy of source_path at new_field_path. - -Note that if begin or end is negative, it is considered relative to -the size of the array. e.g., slice(...,begin=-1) will get the last -element of every array. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the source of the slice. -
-`new_field_name` - -the new field that is generated. -
-`begin` - -the beginning of the slice (inclusive). -
-`end` - -the end of the slice (exclusive). -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

truncate

- - - -Creates a truncated copy of source_path at new_field_path. - - -

__eq__

- - - -if hash(expr1) == hash(expr2): then expr1 == expr2. - -Do not override this method. -Args: - expr: The expression to check equality against - - - - - - - - - -
Returns
-Boolean of equality of two expressions -
- - - - - diff --git a/g3doc/api_docs/python/expression_impl/promote/PromoteExpression.md b/g3doc/api_docs/python/expression_impl/promote/PromoteExpression.md deleted file mode 100644 index b1f8e32..0000000 --- a/g3doc/api_docs/python/expression_impl/promote/PromoteExpression.md +++ /dev/null @@ -1,1041 +0,0 @@ -description: A promoted leaf. - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -# expression_impl.promote.PromoteExpression - - - - - - - - - -A promoted leaf. - - - - - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - -if the expression is repeated. -
-`my_type` - -the DType of the field. -
-`schema_feature` - -schema information about the field. -
- - - - - - - - - - - - - - - - - - - - - - - -
-`is_leaf` - -True iff the node tensor is a LeafNodeTensor. -
-`is_repeated` - -True iff the same parent value can have multiple children values. -
-`schema_feature` - -Return the schema of the field. -
-`type` - -dtype of the expression, or None if not a leaf expression. -
- - - -## Methods - -

apply

- - - - - - -

apply_schema

- - - - - - -

broadcast

- - - -Broadcasts the existing field at source_path to the sibling_field. - - -

calculate

- -View source - - - -Calculates the node tensor of the expression. - -The node tensor must be a function of the properties of the expression -and the node tensors of the expressions from get_source_expressions(). - -If is_leaf, then calculate must return a LeafNodeTensor. -Otherwise, it must return a ChildNodeTensor or RootNodeTensor. - -If calculate_is_identity is true, then this must return source_tensors[0]. - -Sometimes, for operations such as parsing the proto, calculate will return -additional information. For example, calculate() for the root of the -proto expression also parses out the tensors required to calculate the -tensors of the children. This is why destinations are required. - -For a reference use, see calculate_value_slowly(...) below. - - - - - - - - - - - - - - - - - - - -
Args
-`source_tensors` - -The node tensors of the expressions in -get_source_expressions(). -
-`destinations` - -The expressions that will use the output of this method. -
-`options` - -Options for the calculation. -
-`side_info` - -An optional prensor that is used to bind to a placeholder -expression. -
- - - - - - - - - - - -
Returns
-A NodeTensor representing the output of this expression. -
- - - -

calculation_equal

- -View source - - - -self.calculate is equal to another expression.calculate. - -Given the same source node tensors, self.calculate(...) and -expression.calculate(...) will have the same result. - -Note that this does not check that the source expressions of the two -expressions are the same. Therefore, two operations can have the same -calculation, but not the same output, because their sources are different. -For example, if a.calculation_is_identity() is True and -b.calculation_is_identity() is True, then a.calculation_equal(b) is True. -However, unless a and b have the same source, the expressions themselves are -not equal. - - - - - - - - - - -
Args
-`expression` - -The expression to compare to. -
- - - -

calculation_is_identity

- -View source - - - -True iff the self.calculate is the identity. - -There is exactly one source, and the output of self.calculate(...) is the -node tensor of this source. - -

cogroup_by_index

- - - -Creates a cogroup of left_name and right_name at new_field_name. - - -

create_has_field

- - - -Creates a field that is the presence of the source path. - - -

create_proto_index

- - - -Creates a proto index field as a direct child of the current root. - -The proto index maps each root element to the original batch index. -For example: [0, 2] means the first element came from the first proto -in the original input tensor and the second element came from the third -proto. The created field is always "dense" -- it has the same valency as -the current root. - - - - - - - - - - -
Args
-`field_name` - -the name of the field to be created. -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

create_size_field

- - - -Creates a field that is the size of the source path. - - -

get_child

- - - -Gets a named child. - - -

get_child_or_error

- - - -Gets a named child. - - -

get_descendant

- - - -Finds the descendant at the path. - - -

get_descendant_or_error

- - - -Finds the descendant at the path. - - -

get_known_children

- - - - - - -

get_known_descendants

- - - -Gets a mapping from known paths to subexpressions. - -The difference between this and get_descendants in Prensor is that -all paths in a Prensor are realized, thus all known. But an Expression's -descendants might not all be known at the point this method is called, -because an expression may have an infinite number of children. - - - - - - - - - -
Returns
-A mapping from paths (relative to the root of the subexpression) to -expressions. -
- - - -

get_paths_with_schema

- - - -Extract only paths that contain schema information. - - -

get_schema

- - - -Returns a schema for the entire tree. - - - - - - - - - - - -
Args
-`create_schema_features` - -If True, schema features are added for all -children and a schema entry is created if not available on the child. If -False, features are left off of the returned schema if there is no -schema_feature on the child. -
- - - -

get_source_expressions

- -View source - - - -Gets the sources of this expression. - -The node tensors of the source expressions must be sufficient to -calculate the node tensor of this expression -(see calculate and calculate_value_slowly). - - - - - - - - - -
Returns
-The sources of this expression. -
- - - -

known_field_names

- - - -Returns known field names of the expression. - - -Known field names of a parsed proto correspond to the fields declared in -the message. Examples of "unknown" fields are extensions and explicit casts -in an any field. The only way to know if an unknown field "(foo.bar)" is -present in an expression expr is to call (expr["(foo.bar)"] is not None). - -Notice that simply accessing a field does not make it "known". However, -setting a field (or setting a descendant of a field) will make it known. - -project(...) returns an expression where the known field names are the only -field names. In general, if you want to depend upon known_field_names -(e.g., if you want to compile a expression), then the best approach is to -project() the expression first. - - - - - - - - - -
Returns
-An immutable set of field names. -
- - - -

map_field_values

- - - -Map a primitive field to create a new primitive field. - -Note: the dtype argument is added since the v1 API. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the origin path. -
-`operator` - -an element-wise operator that takes a 1-dimensional vector. -
-`dtype` - -the type of the output. -
-`new_field_name` - -the name of a new sibling of source_path. -
- - - - - - - - - - - -
Returns
-the resulting root expression. -
- - - -

map_ragged_tensors

- - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

map_sparse_tensors

- - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

project

- - - -Constrains the paths to those listed. - - -

promote

- - - -Promotes source_path to be a field new_field_name in its grandparent. - - -

promote_and_broadcast

- - - - - - -

reroot

- - - -Returns a new list of protocol buffers available at new_root. - - -

schema_string

- - - -Returns a schema for the expression. - -E.g. - -repeated root: - optional int32 foo - optional bar: - optional string baz - optional int64 bak - -Note that unknown fields and subexpressions are not displayed. - - - - - - - - - - -
Args
-`limit` - -if present, limit the recursion. -
- - - - - - - - - - - -
Returns
-A string, describing (a part of) the schema. -
- - - -

slice

- - - -Creates a slice copy of source_path at new_field_path. - -Note that if begin or end is negative, it is considered relative to -the size of the array. e.g., slice(...,begin=-1) will get the last -element of every array. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the source of the slice. -
-`new_field_name` - -the new field that is generated. -
-`begin` - -the beginning of the slice (inclusive). -
-`end` - -the end of the slice (exclusive). -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

truncate

- - - -Creates a truncated copy of source_path at new_field_path. - - -

__eq__

- - - -if hash(expr1) == hash(expr2): then expr1 == expr2. - -Do not override this method. -Args: - expr: The expression to check equality against - - - - - - - - - -
Returns
-Boolean of equality of two expressions -
- - - - - diff --git a/g3doc/api_docs/python/expression_impl/promote/promote.md b/g3doc/api_docs/python/expression_impl/promote/promote.md deleted file mode 100644 index 833064c..0000000 --- a/g3doc/api_docs/python/expression_impl/promote/promote.md +++ /dev/null @@ -1,35 +0,0 @@ -description: Promote a path to be a child of its grandparent, and give it a name. - -
- - -
- -# expression_impl.promote.promote - - - - - - - - - -Promote a path to be a child of its grandparent, and give it a name. - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/promote/promote_anonymous.md b/g3doc/api_docs/python/expression_impl/promote/promote_anonymous.md deleted file mode 100644 index 06c3490..0000000 --- a/g3doc/api_docs/python/expression_impl/promote/promote_anonymous.md +++ /dev/null @@ -1,34 +0,0 @@ -description: Promote a path to be a new anonymous child of its grandparent. - -
- - -
- -# expression_impl.promote.promote_anonymous - - - - - - - - - -Promote a path to be a new anonymous child of its grandparent. - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/promote_and_broadcast.md b/g3doc/api_docs/python/expression_impl/promote_and_broadcast.md deleted file mode 100644 index ec7c211..0000000 --- a/g3doc/api_docs/python/expression_impl/promote_and_broadcast.md +++ /dev/null @@ -1,126 +0,0 @@ -description: promote_and_broadcast a set of nodes. - -
- - -
- -# Module: expression_impl.promote_and_broadcast - - - - - - - - - -promote_and_broadcast a set of nodes. - - -For example, suppose an expr represents: - -``` -+ -| -+-session* (stars indicate repeated) - | - +-event* - | | - | +-val*-int64 - | - +-user_info? (question mark indicates optional) - | - +-age? int64 - -session: { - event: { - val: 1 - } - event: { - val: 4 - val: 5 - } - user_info: { - age: 25 - } -} - -session: { - event: { - val: 7 - } - event: { - val: 8 - val: 9 - } - user_info: { - age: 20 - } -} -``` - -``` -promote_and_broadcast.promote_and_broadcast( - path.Path(["event"]),{"nage":path.Path(["user_info","age"])}) -``` - -creates: - -``` -+ -| -+-session* (stars indicate repeated) - | - +-event* - | | - | +-val*-int64 - | | - | +-nage*-int64 - | - +-user_info? (question mark indicates optional) - | - +-age? int64 - -session: { - event: { - nage: 25 - val: 1 - } - event: { - nage: 25 - val: 4 - val: 5 - } - user_info: { - age: 25 - } -} - -session: { - event: { - nage: 20 - val: 7 - } - event: { - nage: 20 - val: 8 - val: 9 - } - user_info: { - age: 20 - } -} -``` - -## Functions - -[`promote_and_broadcast(...)`](../expression_impl/promote_and_broadcast/promote_and_broadcast.md): Promote and broadcast a set of paths to a particular location. - -[`promote_and_broadcast_anonymous(...)`](../expression_impl/promote_and_broadcast/promote_and_broadcast_anonymous.md): Promotes then broadcasts the origin until its parent is new_parent. - diff --git a/g3doc/api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast.md b/g3doc/api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast.md deleted file mode 100644 index 212fe1f..0000000 --- a/g3doc/api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast.md +++ /dev/null @@ -1,81 +0,0 @@ -description: Promote and broadcast a set of paths to a particular location. - -
- - -
- -# expression_impl.promote_and_broadcast.promote_and_broadcast - - - - - - - - - -Promote and broadcast a set of paths to a particular location. - - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -the original expression. -
-`path_dictionary` - -a map from destination fields to origin paths. -
-`dest_path_parent` - -a map from destination strings. -
- - - - - - - - - - - -
-A new expression, where all the origin paths are promoted and broadcast -until they are children of dest_path_parent. -
- diff --git a/g3doc/api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast_anonymous.md b/g3doc/api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast_anonymous.md deleted file mode 100644 index 1781907..0000000 --- a/g3doc/api_docs/python/expression_impl/promote_and_broadcast/promote_and_broadcast_anonymous.md +++ /dev/null @@ -1,35 +0,0 @@ -description: Promotes then broadcasts the origin until its parent is new_parent. - -
- - -
- -# expression_impl.promote_and_broadcast.promote_and_broadcast_anonymous - - - - - - - - - -Promotes then broadcasts the origin until its parent is new_parent. - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/proto.md b/g3doc/api_docs/python/expression_impl/proto.md deleted file mode 100644 index 9d0016b..0000000 --- a/g3doc/api_docs/python/expression_impl/proto.md +++ /dev/null @@ -1,51 +0,0 @@ -description: Expressions to parse a proto. - -
- - -
- -# Module: expression_impl.proto - - - - - - - - - -Expressions to parse a proto. - - -These expressions return values with more information than standard node values. -Specifically, each node calculates additional tensors that are used as inputs -for its children. - -## Classes - -[`class DescriptorPool`](../expression_impl/proto/DescriptorPool.md): A collection of protobufs dynamically constructed by descriptor protos. - -[`class FileDescriptorSet`](../expression_impl/proto/FileDescriptorSet.md): A ProtocolMessage - -## Functions - -[`create_expression_from_file_descriptor_set(...)`](../expression_impl/proto/create_expression_from_file_descriptor_set.md): Create an expression from a 1D tensor of serialized protos. - -[`create_expression_from_proto(...)`](../expression_impl/proto/create_expression_from_proto.md): Create an expression from a 1D tensor of serialized protos. - -[`create_transformed_field(...)`](../expression_impl/proto/create_transformed_field.md): Create an expression that transforms serialized proto tensors. - -[`is_proto_expression(...)`](../expression_impl/proto/is_proto_expression.md): Returns true if an expression is a ProtoExpression. - -## Type Aliases - -[`ProtoExpression`](../expression_impl/proto/ProtoExpression.md) - -[`TransformFn`](../expression_impl/proto/TransformFn.md) - diff --git a/g3doc/api_docs/python/expression_impl/proto/DescriptorPool.md b/g3doc/api_docs/python/expression_impl/proto/DescriptorPool.md deleted file mode 100644 index f5e36b6..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/DescriptorPool.md +++ /dev/null @@ -1,813 +0,0 @@ -description: A collection of protobufs dynamically constructed by descriptor protos. - -
- - - - - - - - - - - - - - - - - - - - - - -
- -# expression_impl.proto.DescriptorPool - - - - - - - - - -A collection of protobufs dynamically constructed by descriptor protos. - - - - - - - - - - - - - - - - - -
-`descriptor_db` - -A secondary source of file descriptors. -
- - - -## Methods - -

Add

- - - -Adds the FileDescriptorProto and its types to this pool. - - - - - - - - - - -
Args
-file_desc_proto (FileDescriptorProto): The file descriptor to add. -
- - - -

AddDescriptor

- - - - - - -

AddEnumDescriptor

- - - - - - -

AddExtensionDescriptor

- - - - - - -

AddFileDescriptor

- - - - - - -

AddSerializedFile

- - - -Adds the FileDescriptorProto and its types to this pool. - - - - - - - - - - -
Args
-serialized_file_desc_proto (bytes): A bytes string, serialization of the -:class:`FileDescriptorProto` to add. -
- - - -

AddServiceDescriptor

- - - - - - -

FindAllExtensions

- - - -Gets all the known extensions of a given message. - -Extensions have to be registered to this pool by build related -:func:`Add` or :func:`AddExtensionDescriptor`. - - - - - - - - - -
Args
-message_descriptor (Descriptor): Descriptor of the extended message. -
- - - - - - - - - - - -
Returns
-list[FieldDescriptor]: Field descriptors describing the extensions. -
- - - -

FindEnumTypeByName

- - - -Loads the named enum descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the enum descriptor to load. -
- - - - - - - - - - - - -
Returns
-`EnumDescriptor` - -The enum descriptor for the named type. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the enum cannot be found in the pool. -
- - - -

FindExtensionByName

- - - -Loads the named extension descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the extension descriptor to load. -
- - - - - - - - - - - - -
Returns
-`FieldDescriptor` - -The field descriptor for the named extension. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the extension cannot be found in the pool. -
- - - -

FindExtensionByNumber

- - - -Gets the extension of the specified message with the specified number. - -Extensions have to be registered to this pool by calling :func:`Add` or -:func:`AddExtensionDescriptor`. - - - - - - - - - -
Args
-message_descriptor (Descriptor): descriptor of the extended message. -number (int): Number of the extension field. -
- - - - - - - - - - - - -
Returns
-`FieldDescriptor` - -The descriptor for the extension. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -when no extension with the given number is known for the -specified message. -
- - - -

FindFieldByName

- - - -Loads the named field descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the field descriptor to load. -
- - - - - - - - - - - - -
Returns
-`FieldDescriptor` - -The field descriptor for the named field. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the field cannot be found in the pool. -
- - - -

FindFileByName

- - - -Gets a FileDescriptor by file name. - - - - - - - - - - -
Args
-file_name (str): The path to the file to get a descriptor for. -
- - - - - - - - - - - - -
Returns
-`FileDescriptor` - -The descriptor for the named file. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the file cannot be found in the pool. -
- - - -

FindFileContainingSymbol

- - - -Gets the FileDescriptor for the file containing the specified symbol. - - - - - - - - - - -
Args
-symbol (str): The name of the symbol to search for. -
- - - - - - - - - - - - -
Returns
-`FileDescriptor` - -Descriptor for the file that contains the specified -symbol. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the file cannot be found in the pool. -
- - - -

FindMessageTypeByName

- - - -Loads the named descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the descriptor to load. -
- - - - - - - - - - - - -
Returns
-`Descriptor` - -The descriptor for the named type. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the message cannot be found in the pool. -
- - - -

FindMethodByName

- - - -Loads the named service method descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the method descriptor to load. -
- - - - - - - - - - - - -
Returns
-`MethodDescriptor` - -The method descriptor for the service method. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the method cannot be found in the pool. -
- - - -

FindOneofByName

- - - -Loads the named oneof descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the oneof descriptor to load. -
- - - - - - - - - - - - -
Returns
-`OneofDescriptor` - -The oneof descriptor for the named oneof. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the oneof cannot be found in the pool. -
- - - -

FindServiceByName

- - - -Loads the named service descriptor from the pool. - - - - - - - - - - -
Args
-full_name (str): The full name of the service descriptor to load. -
- - - - - - - - - - - - -
Returns
-`ServiceDescriptor` - -The service descriptor for the named service. -
- - - - - - - - - - - - -
Raises
-`KeyError` - -if the service cannot be found in the pool. -
- - - - - diff --git a/g3doc/api_docs/python/expression_impl/proto/FileDescriptorSet.md b/g3doc/api_docs/python/expression_impl/proto/FileDescriptorSet.md deleted file mode 100644 index 373d76c..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/FileDescriptorSet.md +++ /dev/null @@ -1,41 +0,0 @@ -description: A ProtocolMessage - -
- - -
- -# expression_impl.proto.FileDescriptorSet - - - - - - - - - -A ProtocolMessage - - - - - - - - - - - - - - - -
-`file` - -`repeated FileDescriptorProto file` -
- - - diff --git a/g3doc/api_docs/python/expression_impl/proto/ProtoExpression.md b/g3doc/api_docs/python/expression_impl/proto/ProtoExpression.md deleted file mode 100644 index acd848c..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/ProtoExpression.md +++ /dev/null @@ -1,25 +0,0 @@ -
- - -
- -# expression_impl.proto.ProtoExpression - - -This symbol is a **type alias**. - - - -#### Source: - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/proto/TransformFn.md b/g3doc/api_docs/python/expression_impl/proto/TransformFn.md deleted file mode 100644 index 236fbc0..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/TransformFn.md +++ /dev/null @@ -1,25 +0,0 @@ -
- - -
- -# expression_impl.proto.TransformFn - - -This symbol is a **type alias**. - - - -#### Source: - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/proto/create_expression_from_file_descriptor_set.md b/g3doc/api_docs/python/expression_impl/proto/create_expression_from_file_descriptor_set.md deleted file mode 100644 index f0c2413..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/create_expression_from_file_descriptor_set.md +++ /dev/null @@ -1,93 +0,0 @@ -description: Create an expression from a 1D tensor of serialized protos. - -
- - -
- -# expression_impl.proto.create_expression_from_file_descriptor_set - - - - - - - - - -Create an expression from a 1D tensor of serialized protos. - - - - - - - - - - - - - - - - - - - - - - - - - - -
-`tensor_of_protos` - -1D tensor of serialized protos. -
-`proto_name` - -fully qualified name (e.g. "some.package.SomeProto") of the -proto in `tensor_of_protos`. -
-`file_descriptor_set` - -The FileDescriptorSet proto containing `proto_name`'s -and all its dependencies' FileDescriptorProto. Note that if file1 imports -file2, then file2's FileDescriptorProto must precede file1's in -file_descriptor_set.file. -
-`message_format` - -Indicates the format of the protocol buffer: is one of -'text' or 'binary'. -
- - - - - - - - - - - -
-An expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/proto/create_expression_from_proto.md b/g3doc/api_docs/python/expression_impl/proto/create_expression_from_proto.md deleted file mode 100644 index 4a2744c..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/create_expression_from_proto.md +++ /dev/null @@ -1,81 +0,0 @@ -description: Create an expression from a 1D tensor of serialized protos. - -
- - -
- -# expression_impl.proto.create_expression_from_proto - - - - - - - - - -Create an expression from a 1D tensor of serialized protos. - - - - - - - - - - - - - - - - - - - - - - - -
-`tensor_of_protos` - -1D tensor of serialized protos. -
-`desc` - -a descriptor of protos in tensor of protos. -
-`message_format` - -Indicates the format of the protocol buffer: is one of -'text' or 'binary'. -
- - - - - - - - - - - -
-An expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/proto/create_transformed_field.md b/g3doc/api_docs/python/expression_impl/proto/create_transformed_field.md deleted file mode 100644 index 7555fb9..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/create_transformed_field.md +++ /dev/null @@ -1,126 +0,0 @@ -description: Create an expression that transforms serialized proto tensors. - -
- - -
- -# expression_impl.proto.create_transformed_field - - - - - - - - - -Create an expression that transforms serialized proto tensors. - - - - - - - -The transform_fn argument should take the form: - -def transform_fn(parent_indices, values): - ... - return (transformed_parent_indices, transformed_values) - -#### Given: - - -- parent_indices: an int64 vector of non-decreasing parent message indices. -- values: a string vector of serialized protos having the same shape as - `parent_indices`. -`transform_fn` must return new parent indices and serialized values encoding -the same proto message as the passed in `values`. These two vectors must -have the same size, but it need not be the same as the input arguments. - - - - - - - - - - - - - - - - - - - -
-`expr` - -a source expression containing `source_path`. -
-`source_path` - -the path to the field to reverse. -
-`dest_field` - -the name of the newly created field. This field will be a -sibling of the field identified by `source_path`. -
-`transform_fn` - -a callable that accepts parent_indices and serialized proto -values and returns a posibly modified parent_indices and values. Note that -when CalcuateOptions.use_string_view is set, transform_fn should not have -any stateful side effecting uses of serialized proto inputs. Doing so -could cause segfaults as the backing string tensor lifetime is not -guaranteed when the side effecting operations are run. -
- - - - - - - - - - - -
-An expression. -
- - - - - - - - - - - - -
-`ValueError` - -if the source path is not a proto message field. -
- diff --git a/g3doc/api_docs/python/expression_impl/proto/is_proto_expression.md b/g3doc/api_docs/python/expression_impl/proto/is_proto_expression.md deleted file mode 100644 index 60cc1d3..0000000 --- a/g3doc/api_docs/python/expression_impl/proto/is_proto_expression.md +++ /dev/null @@ -1,33 +0,0 @@ -description: Returns true if an expression is a ProtoExpression. - -
- - -
- -# expression_impl.proto.is_proto_expression - - - - - - - - - -Returns true if an expression is a ProtoExpression. - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/reroot.md b/g3doc/api_docs/python/expression_impl/reroot.md deleted file mode 100644 index e1bf645..0000000 --- a/g3doc/api_docs/python/expression_impl/reroot.md +++ /dev/null @@ -1,35 +0,0 @@ -description: Reroot to a subtree, maintaining an input proto index. - -
- - -
- -# Module: expression_impl.reroot - - - - - - - - - -Reroot to a subtree, maintaining an input proto index. - - -reroot is similar to get_descendant_or_error. However, this method allows -you to call create_proto_index(...) later on, that gives you a reference to the -original proto. - -## Functions - -[`create_proto_index_field(...)`](../expression_impl/reroot/create_proto_index_field.md) - -[`reroot(...)`](../expression_impl/reroot/reroot.md): Reroot to a new path, maintaining a input proto index. - diff --git a/g3doc/api_docs/python/expression_impl/reroot/create_proto_index_field.md b/g3doc/api_docs/python/expression_impl/reroot/create_proto_index_field.md deleted file mode 100644 index 0067663..0000000 --- a/g3doc/api_docs/python/expression_impl/reroot/create_proto_index_field.md +++ /dev/null @@ -1,32 +0,0 @@ -
- - -
- -# expression_impl.reroot.create_proto_index_field - - - - - - - - - - - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/reroot/reroot.md b/g3doc/api_docs/python/expression_impl/reroot/reroot.md deleted file mode 100644 index 7b99d93..0000000 --- a/g3doc/api_docs/python/expression_impl/reroot/reroot.md +++ /dev/null @@ -1,74 +0,0 @@ -description: Reroot to a new path, maintaining a input proto index. - -
- - -
- -# expression_impl.reroot.reroot - - - - - - - - - -Reroot to a new path, maintaining a input proto index. - - - - - - - -Similar to root.get_descendant_or_error(source_path): however, this -method retains the ability to get a map to the original index. - - - - - - - - - - - - - -
-`root` - -the original root. -
-`source_path` - -the path to the new root. -
- - - - - - - - - - - -
-the new root. -
- diff --git a/g3doc/api_docs/python/expression_impl/size.md b/g3doc/api_docs/python/expression_impl/size.md deleted file mode 100644 index 94ec2da..0000000 --- a/g3doc/api_docs/python/expression_impl/size.md +++ /dev/null @@ -1,53 +0,0 @@ -description: Functions for creating new size or has expression. - -
- - -
- -# Module: expression_impl.size - - - - - - - - - -Functions for creating new size or has expression. - - -Given a field "foo.bar", - -``` -root = size(expr, path.Path(["foo","bar"]), "bar_size") -``` - -creates a new expression root that has an optional field "foo.bar_size", which -is always present, and contains the number of bar in a particular foo. - -``` -root_2 = has(expr, path.Path(["foo","bar"]), "bar_has") -``` - -creates a new expression root that has an optional field "foo.bar_has", which -is always present, and is true if there are one or more bar in foo. - -## Classes - -[`class SizeExpression`](../expression_impl/size/SizeExpression.md): Size of the given expression. - -## Functions - -[`has(...)`](../expression_impl/size/has.md): Get the has of a field as a new sibling field. - -[`size(...)`](../expression_impl/size/size.md): Get the size of a field as a new sibling field. - -[`size_anonymous(...)`](../expression_impl/size/size_anonymous.md): Calculate the size of a field, and store it as an anonymous sibling. - diff --git a/g3doc/api_docs/python/expression_impl/size/SizeExpression.md b/g3doc/api_docs/python/expression_impl/size/SizeExpression.md deleted file mode 100644 index 3adb5e0..0000000 --- a/g3doc/api_docs/python/expression_impl/size/SizeExpression.md +++ /dev/null @@ -1,1043 +0,0 @@ -description: Size of the given expression. - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -# expression_impl.size.SizeExpression - - - - - - - - - -Size of the given expression. - - - - - - - -SizeExpression is intended to be a sibling of origin. -origin_parent should be the parent of origin. - - - - - - - - - - - - - - - - -
-`is_repeated` - -if the expression is repeated. -
-`my_type` - -the DType of the field. -
-`schema_feature` - -schema information about the field. -
- - - - - - - - - - - - - - - - - - - - - - - -
-`is_leaf` - -True iff the node tensor is a LeafNodeTensor. -
-`is_repeated` - -True iff the same parent value can have multiple children values. -
-`schema_feature` - -Return the schema of the field. -
-`type` - -dtype of the expression, or None if not a leaf expression. -
- - - -## Methods - -

apply

- - - - - - -

apply_schema

- - - - - - -

broadcast

- - - -Broadcasts the existing field at source_path to the sibling_field. - - -

calculate

- -View source - - - -Calculates the node tensor of the expression. - -The node tensor must be a function of the properties of the expression -and the node tensors of the expressions from get_source_expressions(). - -If is_leaf, then calculate must return a LeafNodeTensor. -Otherwise, it must return a ChildNodeTensor or RootNodeTensor. - -If calculate_is_identity is true, then this must return source_tensors[0]. - -Sometimes, for operations such as parsing the proto, calculate will return -additional information. For example, calculate() for the root of the -proto expression also parses out the tensors required to calculate the -tensors of the children. This is why destinations are required. - -For a reference use, see calculate_value_slowly(...) below. - - - - - - - - - - - - - - - - - - - -
Args
-`source_tensors` - -The node tensors of the expressions in -get_source_expressions(). -
-`destinations` - -The expressions that will use the output of this method. -
-`options` - -Options for the calculation. -
-`side_info` - -An optional prensor that is used to bind to a placeholder -expression. -
- - - - - - - - - - - -
Returns
-A NodeTensor representing the output of this expression. -
- - - -

calculation_equal

- -View source - - - -self.calculate is equal to another expression.calculate. - -Given the same source node tensors, self.calculate(...) and -expression.calculate(...) will have the same result. - -Note that this does not check that the source expressions of the two -expressions are the same. Therefore, two operations can have the same -calculation, but not the same output, because their sources are different. -For example, if a.calculation_is_identity() is True and -b.calculation_is_identity() is True, then a.calculation_equal(b) is True. -However, unless a and b have the same source, the expressions themselves are -not equal. - - - - - - - - - - -
Args
-`expression` - -The expression to compare to. -
- - - -

calculation_is_identity

- -View source - - - -True iff the self.calculate is the identity. - -There is exactly one source, and the output of self.calculate(...) is the -node tensor of this source. - -

cogroup_by_index

- - - -Creates a cogroup of left_name and right_name at new_field_name. - - -

create_has_field

- - - -Creates a field that is the presence of the source path. - - -

create_proto_index

- - - -Creates a proto index field as a direct child of the current root. - -The proto index maps each root element to the original batch index. -For example: [0, 2] means the first element came from the first proto -in the original input tensor and the second element came from the third -proto. The created field is always "dense" -- it has the same valency as -the current root. - - - - - - - - - - -
Args
-`field_name` - -the name of the field to be created. -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

create_size_field

- - - -Creates a field that is the size of the source path. - - -

get_child

- - - -Gets a named child. - - -

get_child_or_error

- - - -Gets a named child. - - -

get_descendant

- - - -Finds the descendant at the path. - - -

get_descendant_or_error

- - - -Finds the descendant at the path. - - -

get_known_children

- - - - - - -

get_known_descendants

- - - -Gets a mapping from known paths to subexpressions. - -The difference between this and get_descendants in Prensor is that -all paths in a Prensor are realized, thus all known. But an Expression's -descendants might not all be known at the point this method is called, -because an expression may have an infinite number of children. - - - - - - - - - -
Returns
-A mapping from paths (relative to the root of the subexpression) to -expressions. -
- - - -

get_paths_with_schema

- - - -Extract only paths that contain schema information. - - -

get_schema

- - - -Returns a schema for the entire tree. - - - - - - - - - - - -
Args
-`create_schema_features` - -If True, schema features are added for all -children and a schema entry is created if not available on the child. If -False, features are left off of the returned schema if there is no -schema_feature on the child. -
- - - -

get_source_expressions

- -View source - - - -Gets the sources of this expression. - -The node tensors of the source expressions must be sufficient to -calculate the node tensor of this expression -(see calculate and calculate_value_slowly). - - - - - - - - - -
Returns
-The sources of this expression. -
- - - -

known_field_names

- - - -Returns known field names of the expression. - - -Known field names of a parsed proto correspond to the fields declared in -the message. Examples of "unknown" fields are extensions and explicit casts -in an any field. The only way to know if an unknown field "(foo.bar)" is -present in an expression expr is to call (expr["(foo.bar)"] is not None). - -Notice that simply accessing a field does not make it "known". However, -setting a field (or setting a descendant of a field) will make it known. - -project(...) returns an expression where the known field names are the only -field names. In general, if you want to depend upon known_field_names -(e.g., if you want to compile a expression), then the best approach is to -project() the expression first. - - - - - - - - - -
Returns
-An immutable set of field names. -
- - - -

map_field_values

- - - -Map a primitive field to create a new primitive field. - -Note: the dtype argument is added since the v1 API. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the origin path. -
-`operator` - -an element-wise operator that takes a 1-dimensional vector. -
-`dtype` - -the type of the output. -
-`new_field_name` - -the name of a new sibling of source_path. -
- - - - - - - - - - - -
Returns
-the resulting root expression. -
- - - -

map_ragged_tensors

- - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

map_sparse_tensors

- - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

project

- - - -Constrains the paths to those listed. - - -

promote

- - - -Promotes source_path to be a field new_field_name in its grandparent. - - -

promote_and_broadcast

- - - - - - -

reroot

- - - -Returns a new list of protocol buffers available at new_root. - - -

schema_string

- - - -Returns a schema for the expression. - -E.g. - -repeated root: - optional int32 foo - optional bar: - optional string baz - optional int64 bak - -Note that unknown fields and subexpressions are not displayed. - - - - - - - - - - -
Args
-`limit` - -if present, limit the recursion. -
- - - - - - - - - - - -
Returns
-A string, describing (a part of) the schema. -
- - - -

slice

- - - -Creates a slice copy of source_path at new_field_path. - -Note that if begin or end is negative, it is considered relative to -the size of the array. e.g., slice(...,begin=-1) will get the last -element of every array. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the source of the slice. -
-`new_field_name` - -the new field that is generated. -
-`begin` - -the beginning of the slice (inclusive). -
-`end` - -the end of the slice (exclusive). -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

truncate

- - - -Creates a truncated copy of source_path at new_field_path. - - -

__eq__

- - - -if hash(expr1) == hash(expr2): then expr1 == expr2. - -Do not override this method. -Args: - expr: The expression to check equality against - - - - - - - - - -
Returns
-Boolean of equality of two expressions -
- - - - - diff --git a/g3doc/api_docs/python/expression_impl/size/has.md b/g3doc/api_docs/python/expression_impl/size/has.md deleted file mode 100644 index 3dc9d63..0000000 --- a/g3doc/api_docs/python/expression_impl/size/has.md +++ /dev/null @@ -1,80 +0,0 @@ -description: Get the has of a field as a new sibling field. - -
- - -
- -# expression_impl.size.has - - - - - - - - - -Get the has of a field as a new sibling field. - - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -the original expression. -
-`source_path` - -the source path to measure. Cannot be root. -
-`new_field_name` - -the name of the sibling field. -
- - - - - - - - - - - -
-The new expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/size/size.md b/g3doc/api_docs/python/expression_impl/size/size.md deleted file mode 100644 index 819c9e3..0000000 --- a/g3doc/api_docs/python/expression_impl/size/size.md +++ /dev/null @@ -1,80 +0,0 @@ -description: Get the size of a field as a new sibling field. - -
- - -
- -# expression_impl.size.size - - - - - - - - - -Get the size of a field as a new sibling field. - - - - - - - - - - - - - - - - - - - - - - - -
-`root` - -the original expression. -
-`source_path` - -the source path to measure. Cannot be root. -
-`new_field_name` - -the name of the sibling field. -
- - - - - - - - - - - -
-The new expression. -
- diff --git a/g3doc/api_docs/python/expression_impl/size/size_anonymous.md b/g3doc/api_docs/python/expression_impl/size/size_anonymous.md deleted file mode 100644 index 792d371..0000000 --- a/g3doc/api_docs/python/expression_impl/size/size_anonymous.md +++ /dev/null @@ -1,72 +0,0 @@ -description: Calculate the size of a field, and store it as an anonymous sibling. - -
- - -
- -# expression_impl.size.size_anonymous - - - - - - - - - -Calculate the size of a field, and store it as an anonymous sibling. - - - - - - - - - - - - - - - - - - - - -
-`root` - -the original expression. -
-`source_path` - -the source path to measure. Cannot be root. -
- - - - - - - - - - - -
-The new expression and the new field as a pair. -
- diff --git a/g3doc/api_docs/python/expression_impl/slice_expression.md b/g3doc/api_docs/python/expression_impl/slice_expression.md deleted file mode 100644 index 9ba697d..0000000 --- a/g3doc/api_docs/python/expression_impl/slice_expression.md +++ /dev/null @@ -1,135 +0,0 @@ -description: Implementation of slice. - -
- - -
- -# Module: expression_impl.slice_expression - - - - - - - - - -Implementation of slice. - - - -The slice operation is meant to replicate the slicing of a list in python. - -Slicing a list in python is done by specifying a beginning and ending. -The resulting list consists of all elements in the range. - -#### For example: - - - -``` -``` ->>> x = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] ->>> print(x[2:5]) # all elements between index 2 inclusive and index 5 exclusive -['c', 'd', 'e'] ->>> print(x[2:]) # all elements between index 2 and the end. -['c', 'd', 'e', 'f', 'g'] ->>> print(x[:4]) # all elements between the beginning and index 4 (exclusive). -['a', 'b', 'c', 'd'] ->>> print(x[-3:-1]) # all elements starting three from the end. ->>> # until one from the end (exclusive). -['e', 'f'] ->>> print(x[-3:6]) # all elements starting three from the end - # until index 6 exclusive. -['e', 'f', 'g'] -``` -``` - - -over the elements (e.g. x[2:6:2]=['c', 'e'], giving you every other element. -This is not implemented. - - -A prensor can be considered to be interleaved lists and dictionaries. -E.g.: - -``` -my_expression = [{ - "foo":[ - {"bar":[ - {"baz":["a","b","c", "d"]}, - {"baz":["d","e","f"]} - ] - }, - {"bar":[ - {"baz":["g","h","i"]}, - {"baz":["j","k","l", ]} - {"baz":["m"]} - ] - }] -}] -``` - -``` -result_1 = slice_expression.slice_expression( - my_expression, "foo.bar", "new_bar",begin=1, end=3) - -result_1 = [{ - "foo":[ - {"bar":[ - {"baz":["a","b","c", "d"]}, - {"baz":["d","e","f"]} - ], - "new_bar":[ - {"baz":["d","e","f"]} - ] - }, - {"bar":[ - {"baz":["g","h","i"]}, - {"baz":["j","k","l", ]} - {"baz":["m", ]} - ], - "new_bar":[ - {"baz":["j","k","l", ]} - {"baz":["m", ]} - ] - }] -}] -``` - -``` -result_2 = slice_expression.slice_expression( - my_expression, "foo.bar.baz", "new_baz",begin=1, end=3) - -result_2 = [{ - "foo":[ - {"bar":[ - {"baz":["a","b","c", "d"], - "new_baz":["b","c"], - }, - {"baz":["d","e","f"], "new_baz":["e","f"]} - ] - }, - {"bar":[ - {"baz":["g","h","i"], "new_baz":["h","i"]}, - {"baz":["j","k","l"], "new_baz":["k","l"]}, - {"baz":["m", ]} - ] - }] -}] -``` - -## Functions - -[`slice_expression(...)`](../expression_impl/slice_expression/slice_expression.md): Creates a new subtree with a sliced expression. - -## Type Aliases - -[`IndexValue`](../expression_impl/slice_expression/IndexValue.md) - diff --git a/g3doc/api_docs/python/expression_impl/slice_expression/IndexValue.md b/g3doc/api_docs/python/expression_impl/slice_expression/IndexValue.md deleted file mode 100644 index 952d132..0000000 --- a/g3doc/api_docs/python/expression_impl/slice_expression/IndexValue.md +++ /dev/null @@ -1,25 +0,0 @@ -
- - -
- -# expression_impl.slice_expression.IndexValue - - -This symbol is a **type alias**. - - - -#### Source: - - - - - - diff --git a/g3doc/api_docs/python/expression_impl/slice_expression/slice_expression.md b/g3doc/api_docs/python/expression_impl/slice_expression/slice_expression.md deleted file mode 100644 index 9de0d23..0000000 --- a/g3doc/api_docs/python/expression_impl/slice_expression/slice_expression.md +++ /dev/null @@ -1,98 +0,0 @@ -description: Creates a new subtree with a sliced expression. - -
- - -
- -# expression_impl.slice_expression.slice_expression - - - - - - - - - -Creates a new subtree with a sliced expression. - - - - - - - -This follows the pattern of python slice() method. -See module-level comments for examples. - - - - - - - - - - - - - - - - - - - - - - -
-`expr` - -the original root expression -
-`p` - -the path to the source to be sliced. -
-`new_field_name` - -the name of the new subtree. -
-`begin` - -beginning index -
-`end` - -end index. -
- - - - - - - - - - - -
-A new root expression. -
- diff --git a/g3doc/api_docs/python/s2t.md b/g3doc/api_docs/python/s2t.md deleted file mode 100644 index 6f5e8c6..0000000 --- a/g3doc/api_docs/python/s2t.md +++ /dev/null @@ -1,78 +0,0 @@ -description: Import core names for struct2tensor. - -
- - -
- -# Module: s2t - - - - - - - - - -Import core names for struct2tensor. - - - -## Classes - -[`class ChildNodeTensor`](./s2t/ChildNodeTensor.md): The value of an intermediate node. - -[`class Expression`](./s2t/Expression.md): An expression represents the calculation of a prensor object. - -[`class LeafNodeTensor`](./s2t/LeafNodeTensor.md): The value of a leaf node. - -[`class Path`](./s2t/Path.md): A representation of a path in the expression. - -[`class Prensor`](./s2t/Prensor.md): A expression of NodeTensor objects. - -[`class RootNodeTensor`](./s2t/RootNodeTensor.md): The value of the root. - -## Functions - -[`calculate_prensors(...)`](./s2t/calculate_prensors.md): Gets the prensor value of the expressions. - -[`calculate_prensors_with_graph(...)`](./s2t/calculate_prensors_with_graph.md): Gets the prensor value of the expressions and the graph used. - -[`calculate_prensors_with_source_paths(...)`](./s2t/calculate_prensors_with_source_paths.md): Returns a list of prensor trees, and proto summaries. - -[`create_expression_from_file_descriptor_set(...)`](./s2t/create_expression_from_file_descriptor_set.md): Create an expression from a 1D tensor of serialized protos. - -[`create_expression_from_prensor(...)`](./s2t/create_expression_from_prensor.md): Gets an expression representing the prensor. - -[`create_expression_from_proto(...)`](./s2t/create_expression_from_proto.md): Create an expression from a 1D tensor of serialized protos. - -[`create_path(...)`](./s2t/create_path.md): Create a path from an object. - -[`create_prensor_from_descendant_nodes(...)`](./s2t/create_prensor_from_descendant_nodes.md): Create a prensor from a map of paths to NodeTensor. - -[`create_prensor_from_root_and_children(...)`](./s2t/create_prensor_from_root_and_children.md) - -[`get_default_options(...)`](./s2t/get_default_options.md): Get the default options. - -[`get_options_with_minimal_checks(...)`](./s2t/get_options_with_minimal_checks.md): Options for calculation with minimal runtime checks. - -[`get_ragged_tensor(...)`](./s2t/get_ragged_tensor.md): Get a ragged tensor for a path. (deprecated) - -[`get_ragged_tensors(...)`](./s2t/get_ragged_tensors.md): Gets ragged tensors for all the leaves of the prensor expression. (deprecated) - -[`get_sparse_tensor(...)`](./s2t/get_sparse_tensor.md): Gets a sparse tensor for path p. (deprecated) - -[`get_sparse_tensors(...)`](./s2t/get_sparse_tensors.md): Gets sparse tensors for all the leaves of the prensor expression. (deprecated) - -## Type Aliases - -[`NodeTensor`](./s2t/NodeTensor.md) - -[`Step`](./s2t/Step.md) - diff --git a/g3doc/api_docs/python/s2t/ChildNodeTensor.md b/g3doc/api_docs/python/s2t/ChildNodeTensor.md deleted file mode 100644 index cc3d3c1..0000000 --- a/g3doc/api_docs/python/s2t/ChildNodeTensor.md +++ /dev/null @@ -1,139 +0,0 @@ -description: The value of an intermediate node. - -
- - - - -
- -# s2t.ChildNodeTensor - - - - - - - - - -The value of an intermediate node. - - - - - - - - - - - - - - - - - - - - -
-`parent_index` - -a 1-D int64 tensor where parent_index[i] represents the -parent index of the ith child. -
-`is_repeated` - -a bool indicating if there can be more than one child per -parent. -
- - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - - -
-`parent_index` - - -
-`size` - -Returns the size, as if this was the root prensor. -
- - - -## Methods - -

get_positional_index

- -View source - - - -Gets the positional index for this ChildNodeTensor. - -The positional index tells us which index of the parent an element is. - -For example, with the following parent indices: [0, 0, 2] -we would have positional index: -[ - 0, # The 0th element of the 0th parent. - 1, # The 1st element of the 0th parent. - 0 # The 0th element of the 2nd parent. -]. - -For more information, view ops/run_length_before_op.cc - -This is the same for Leaf NodeTensors. - - - - - - - - - -
Returns
-A tensor of positional indices. -
- - - - - diff --git a/g3doc/api_docs/python/s2t/Expression.md b/g3doc/api_docs/python/s2t/Expression.md deleted file mode 100644 index dd32541..0000000 --- a/g3doc/api_docs/python/s2t/Expression.md +++ /dev/null @@ -1,1102 +0,0 @@ -description: An expression represents the calculation of a prensor object. - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -# s2t.Expression - - - - - - - - - -An expression represents the calculation of a prensor object. - - - - - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - -if the expression is repeated. -
-`my_type` - -the DType of a field, or None for an internal node. -
-`schema_feature` - -the local schema (StructDomain information should not be -present). -
- - - - - - - - - - - - - - - - - - - - - - - -
-`is_leaf` - -True iff the node tensor is a LeafNodeTensor. -
-`is_repeated` - -True iff the same parent value can have multiple children values. -
-`schema_feature` - -Return the schema of the field. -
-`type` - -dtype of the expression, or None if not a leaf expression. -
- - - -## Methods - -

apply

- -View source - - - - - - -

apply_schema

- -View source - - - - - - -

broadcast

- -View source - - - -Broadcasts the existing field at source_path to the sibling_field. - - -

calculate

- -View source - - - -Calculates the node tensor of the expression. - -The node tensor must be a function of the properties of the expression -and the node tensors of the expressions from get_source_expressions(). - -If is_leaf, then calculate must return a LeafNodeTensor. -Otherwise, it must return a ChildNodeTensor or RootNodeTensor. - -If calculate_is_identity is true, then this must return source_tensors[0]. - -Sometimes, for operations such as parsing the proto, calculate will return -additional information. For example, calculate() for the root of the -proto expression also parses out the tensors required to calculate the -tensors of the children. This is why destinations are required. - -For a reference use, see calculate_value_slowly(...) below. - - - - - - - - - - - - - - - - - - - -
Args
-`source_tensors` - -The node tensors of the expressions in -get_source_expressions(). -
-`destinations` - -The expressions that will use the output of this method. -
-`options` - -Options for the calculation. -
-`side_info` - -An optional prensor that is used to bind to a placeholder -expression. -
- - - - - - - - - - - -
Returns
-A NodeTensor representing the output of this expression. -
- - - -

calculation_equal

- -View source - - - -self.calculate is equal to another expression.calculate. - -Given the same source node tensors, self.calculate(...) and -expression.calculate(...) will have the same result. - -Note that this does not check that the source expressions of the two -expressions are the same. Therefore, two operations can have the same -calculation, but not the same output, because their sources are different. -For example, if a.calculation_is_identity() is True and -b.calculation_is_identity() is True, then a.calculation_equal(b) is True. -However, unless a and b have the same source, the expressions themselves are -not equal. - - - - - - - - - - -
Args
-`expression` - -The expression to compare to. -
- - - -

calculation_is_identity

- -View source - - - -True iff the self.calculate is the identity. - -There is exactly one source, and the output of self.calculate(...) is the -node tensor of this source. - -

cogroup_by_index

- -View source - - - -Creates a cogroup of left_name and right_name at new_field_name. - - -

create_has_field

- -View source - - - -Creates a field that is the presence of the source path. - - -

create_proto_index

- -View source - - - -Creates a proto index field as a direct child of the current root. - -The proto index maps each root element to the original batch index. -For example: [0, 2] means the first element came from the first proto -in the original input tensor and the second element came from the third -proto. The created field is always "dense" -- it has the same valency as -the current root. - - - - - - - - - - -
Args
-`field_name` - -the name of the field to be created. -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

create_size_field

- -View source - - - -Creates a field that is the size of the source path. - - -

get_child

- -View source - - - -Gets a named child. - - -

get_child_or_error

- -View source - - - -Gets a named child. - - -

get_descendant

- -View source - - - -Finds the descendant at the path. - - -

get_descendant_or_error

- -View source - - - -Finds the descendant at the path. - - -

get_known_children

- -View source - - - - - - -

get_known_descendants

- -View source - - - -Gets a mapping from known paths to subexpressions. - -The difference between this and get_descendants in Prensor is that -all paths in a Prensor are realized, thus all known. But an Expression's -descendants might not all be known at the point this method is called, -because an expression may have an infinite number of children. - - - - - - - - - -
Returns
-A mapping from paths (relative to the root of the subexpression) to -expressions. -
- - - -

get_paths_with_schema

- -View source - - - -Extract only paths that contain schema information. - - -

get_schema

- -View source - - - -Returns a schema for the entire tree. - - - - - - - - - - - -
Args
-`create_schema_features` - -If True, schema features are added for all -children and a schema entry is created if not available on the child. If -False, features are left off of the returned schema if there is no -schema_feature on the child. -
- - - -

get_source_expressions

- -View source - - - -Gets the sources of this expression. - -The node tensors of the source expressions must be sufficient to -calculate the node tensor of this expression -(see calculate and calculate_value_slowly). - - - - - - - - - -
Returns
-The sources of this expression. -
- - - -

known_field_names

- -View source - - - -Returns known field names of the expression. - - -Known field names of a parsed proto correspond to the fields declared in -the message. Examples of "unknown" fields are extensions and explicit casts -in an any field. The only way to know if an unknown field "(foo.bar)" is -present in an expression expr is to call (expr["(foo.bar)"] is not None). - -Notice that simply accessing a field does not make it "known". However, -setting a field (or setting a descendant of a field) will make it known. - -project(...) returns an expression where the known field names are the only -field names. In general, if you want to depend upon known_field_names -(e.g., if you want to compile a expression), then the best approach is to -project() the expression first. - - - - - - - - - -
Returns
-An immutable set of field names. -
- - - -

map_field_values

- -View source - - - -Map a primitive field to create a new primitive field. - -Note: the dtype argument is added since the v1 API. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the origin path. -
-`operator` - -an element-wise operator that takes a 1-dimensional vector. -
-`dtype` - -the type of the output. -
-`new_field_name` - -the name of a new sibling of source_path. -
- - - - - - - - - - - -
Returns
-the resulting root expression. -
- - - -

map_ragged_tensors

- -View source - - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

map_sparse_tensors

- -View source - - - -Maps a set of primitive fields of a message to a new field. - -Unlike map_field_values, this operation allows you to some degree reshape -the field. For instance, you can take two optional fields and create a -repeated field, or perform a reduce_sum on the last dimension of a repeated -field and create an optional field. The key constraint is that the operator -must return a sparse tensor of the correct dimension: i.e., a -2D sparse tensor if is_repeated is true, or a 1D sparse tensor if -is_repeated is false. Moreover, the first dimension of the sparse tensor -must be equal to the first dimension of the input tensor. - - - - - - - - - - - - - - - - - - - - - - - - - -
Args
-`parent_path` - -the parent of the input and output fields. -
-`source_fields` - -the nonempty list of names of the source fields. -
-`operator` - -an operator that takes len(source_fields) sparse tensors and -returns a sparse tensor of the appropriate shape. -
-`is_repeated` - -whether the output is repeated. -
-`dtype` - -the dtype of the result. -
-`new_field_name` - -the name of the resulting field. -
- - - - - - - - - - - -
Returns
-A new query. -
- - - -

project

- -View source - - - -Constrains the paths to those listed. - - -

promote

- -View source - - - -Promotes source_path to be a field new_field_name in its grandparent. - - -

promote_and_broadcast

- -View source - - - - - - -

reroot

- -View source - - - -Returns a new list of protocol buffers available at new_root. - - -

schema_string

- -View source - - - -Returns a schema for the expression. - -E.g. - -repeated root: - optional int32 foo - optional bar: - optional string baz - optional int64 bak - -Note that unknown fields and subexpressions are not displayed. - - - - - - - - - - -
Args
-`limit` - -if present, limit the recursion. -
- - - - - - - - - - - -
Returns
-A string, describing (a part of) the schema. -
- - - -

slice

- -View source - - - -Creates a slice copy of source_path at new_field_path. - -Note that if begin or end is negative, it is considered relative to -the size of the array. e.g., slice(...,begin=-1) will get the last -element of every array. - - - - - - - - - - - - - - - - - - - -
Args
-`source_path` - -the source of the slice. -
-`new_field_name` - -the new field that is generated. -
-`begin` - -the beginning of the slice (inclusive). -
-`end` - -the end of the slice (exclusive). -
- - - - - - - - - - - -
Returns
-An Expression object representing the result of the operation. -
- - - -

truncate

- -View source - - - -Creates a truncated copy of source_path at new_field_path. - - -

__eq__

- -View source - - - -if hash(expr1) == hash(expr2): then expr1 == expr2. - -Do not override this method. -Args: - expr: The expression to check equality against - - - - - - - - - -
Returns
-Boolean of equality of two expressions -
- - - - - diff --git a/g3doc/api_docs/python/s2t/LeafNodeTensor.md b/g3doc/api_docs/python/s2t/LeafNodeTensor.md deleted file mode 100644 index eb7bbb0..0000000 --- a/g3doc/api_docs/python/s2t/LeafNodeTensor.md +++ /dev/null @@ -1,147 +0,0 @@ -description: The value of a leaf node. - -
- - - - -
- -# s2t.LeafNodeTensor - - - - - - - - - -The value of a leaf node. - - - - - - - - - - - - - - - - - - - - - - - -
-`parent_index` - -a 1-D int64 tensor where parent_index[i] represents the -parent index of values[i] -
-`values` - -a 1-D tensor of equal length to parent_index. -
-`is_repeated` - -a bool indicating if there can be more than one child per -parent. -
- - - - - - - - - - - - - - - - - - - - -
-`is_repeated` - - -
-`parent_index` - - -
-`values` - - -
- - - -## Methods - -

get_positional_index

- -View source - - - -Gets the positional index for this LeafNodeTensor. - -The positional index tells us which index of the parent an element is. - -For example, with the following parent indices: [0, 0, 2] -we would have positional index: -[ - 0, # The 0th element of the 0th parent. - 1, # The 1st element of the 0th parent. - 0 # The 0th element of the 2nd parent. -]. - -For more information, view ops/run_length_before_op.cc - -This is the same for Child NodeTensors. - - - - - - - - - -
Returns
-A tensor of positional indices. -
- - - - - diff --git a/g3doc/api_docs/python/s2t/NodeTensor.md b/g3doc/api_docs/python/s2t/NodeTensor.md deleted file mode 100644 index 471455e..0000000 --- a/g3doc/api_docs/python/s2t/NodeTensor.md +++ /dev/null @@ -1,25 +0,0 @@ -
- - -
- -# s2t.NodeTensor - - -This symbol is a **type alias**. - - - -#### Source: - - - - - - diff --git a/g3doc/api_docs/python/s2t/Path.md b/g3doc/api_docs/python/s2t/Path.md deleted file mode 100644 index 6fa08b7..0000000 --- a/g3doc/api_docs/python/s2t/Path.md +++ /dev/null @@ -1,340 +0,0 @@ -description: A representation of a path in the expression. - -
- - - - - - - - - - - - - - - - - - - -
- -# s2t.Path - - - - - - - - - -A representation of a path in the expression. - - - - - - - -Do not implement __nonzero__, __eq__, __ne__, et cetera as these are -implicitly defined by __cmp__ and __len__. - - - - - - - - - - -
-`field_list` - -a list or tuple of fields leading from one node to another. -
- - - - - - - - - - - - -
-`ValueError` - -if any field is not a valid step (see is_valid_step). -
- - - -## Methods - -

as_proto

- -View source - - - -Serialize a path as a proto. - -This fails if there are any anonymous fields. - - - - - - - - - -
Returns
-a Path proto. -
- - - -

concat

- -View source - - - - - - -

get_child

- -View source - - - -Get the child path. - - -

get_least_common_ancestor

- -View source - - - -Get the least common ancestor, the longest shared prefix. - - -

get_parent

- -View source - - - -Get the parent path. - - - - - - - - - - -
Returns
-The parent path. -
- - - - - - - - - - - - -
Raises
-`ValueError` - -If this is the root path. -
- - - -

is_ancestor

- -View source - - - -True if self is ancestor of other (i.e. a prefix). - - -

prefix

- -View source - - - - - - -

suffix

- -View source - - - - - - -

__add__

- -View source - - - - - - -

__eq__

- -View source - - - -Return self==value. - - -

__ge__

- -View source - - - -Return self>=value. - - -

__gt__

- -View source - - - -Return self>value. - - -

__le__

- -View source - - - -Return self<=value. - - -

__len__

- -View source - - - - - - -

__lt__

- -View source - - - -Return self__ne__ - -View source - - - -Return self!=value. - - - - diff --git a/g3doc/api_docs/python/s2t/Prensor.md b/g3doc/api_docs/python/s2t/Prensor.md deleted file mode 100644 index e26a368..0000000 --- a/g3doc/api_docs/python/s2t/Prensor.md +++ /dev/null @@ -1,384 +0,0 @@ -description: A expression of NodeTensor objects. - -
- - - - - - - - - - - - - - -
- -# s2t.Prensor - - - - - - - - - -A expression of NodeTensor objects. - - - - - - - - - - - - - - - - - - - - -
-`node` - -the NodeTensor of the root. -
-`children` - -a map from edge to subexpression. -
- - - - - - - - - - - - - - - - - -
-`is_leaf` - -True iff the node value is a LeafNodeTensor. -
-`node` - -The node of the root of the subtree. -
- - - -## Methods - -

field_names

- -View source - - - -Returns the field names of the children. - - -

get_child

- -View source - - - -Gets the child at field_name. - - -

get_child_or_error

- -View source - - - -Gets the child at field_name. - - -

get_children

- -View source - - - -A map from field name to subexpression. - - -

get_descendant

- -View source - - - -Finds the descendant at the path. - - -

get_descendant_or_error

- -View source - - - -Finds the descendant at the path. - - -

get_descendants

- -View source - - - -A map from paths to all subexpressions. - - -

get_ragged_tensor

- -View source - - - -Get a ragged tensor for a path. - -All steps are represented in the ragged tensor. - - - - - - - - - - - - - -
Args
-`p` - -the path to a leaf node in `t`. -
-`options` - -Options for calculating ragged tensors. -
- - - - - - - - - - - -
Returns
-A ragged tensor containing values of the leaf node, preserving the -structure along the path. Raises an error if the path is not found. -
- - - -

get_ragged_tensors

- -View source - - - -Gets ragged tensors for all the leaves of the prensor expression. - - - - - - - - - - - -
Args
-`options` - -Options for calculating ragged tensors. -
- - - - - - - - - - - -
Returns
-A map from paths to ragged tensors. -
- - - -

get_sparse_tensor

- -View source - - - -Gets a sparse tensor for path p. - -Note that any optional fields are not registered as dimensions, as they -can't be represented in a sparse tensor. - - - - - - - - - - - - - -
Args
-`p` - -The path to a leaf node in `t`. -
-`options` - -Currently unused. -
- - - - - - - - - - - -
Returns
-A sparse tensor containing values of the leaf node, preserving the -structure along the path. Raises an error if the path is not found. -
- - - -

get_sparse_tensors

- -View source - - - -Gets sparse tensors for all the leaves of the prensor expression. - - - - - - - - - - - -
Args
-`options` - -Currently unused. -
- - - - - - - - - - - -
Returns
-A map from paths to sparse tensors. -
- - - - - diff --git a/g3doc/api_docs/python/s2t/RootNodeTensor.md b/g3doc/api_docs/python/s2t/RootNodeTensor.md deleted file mode 100644 index c38c929..0000000 --- a/g3doc/api_docs/python/s2t/RootNodeTensor.md +++ /dev/null @@ -1,111 +0,0 @@ -description: The value of the root. - -
- - - - -
- -# s2t.RootNodeTensor - - - - - - - - - -The value of the root. - - - - - - - - - - - - - - - - - -
-`size` - -A scalar int64 tensor saying how many root objects there are. -
- - - - - - - - - - - - - - - - - -
-`is_repeated` - - -
-`size` - - -
- - - -## Methods - -

get_positional_index

- -View source - - - -Gets the positional index for this RootNodeTensor. - -The positional index relative to the node's parent, and thus is always -monotonically increasing at step size 1 for a RootNodeTensor. - - - - - - - - - -
Returns
-A tensor of positional indices. -
- - - - - diff --git a/g3doc/api_docs/python/s2t/Step.md b/g3doc/api_docs/python/s2t/Step.md deleted file mode 100644 index 362604a..0000000 --- a/g3doc/api_docs/python/s2t/Step.md +++ /dev/null @@ -1,24 +0,0 @@ -
- - -
- -# s2t.Step - - -This symbol is a **type alias**. - - - -#### Source: - - - - - - diff --git a/g3doc/api_docs/python/s2t/_api_cache.json b/g3doc/api_docs/python/s2t/_api_cache.json deleted file mode 100644 index f680f60..0000000 --- a/g3doc/api_docs/python/s2t/_api_cache.json +++ /dev/null @@ -1,178 +0,0 @@ -{ - "duplicate_of": { - "s2t.Expression.__ge__": "s2t.ChildNodeTensor.__ge__", - "s2t.Expression.__gt__": "s2t.ChildNodeTensor.__gt__", - "s2t.Expression.__le__": "s2t.ChildNodeTensor.__le__", - "s2t.Expression.__lt__": "s2t.ChildNodeTensor.__lt__", - "s2t.Expression.__ne__": "s2t.ChildNodeTensor.__ne__", - "s2t.Expression.__new__": "s2t.ChildNodeTensor.__new__", - "s2t.LeafNodeTensor.__eq__": "s2t.ChildNodeTensor.__eq__", - "s2t.LeafNodeTensor.__ge__": "s2t.ChildNodeTensor.__ge__", - "s2t.LeafNodeTensor.__gt__": "s2t.ChildNodeTensor.__gt__", - "s2t.LeafNodeTensor.__le__": "s2t.ChildNodeTensor.__le__", - "s2t.LeafNodeTensor.__lt__": "s2t.ChildNodeTensor.__lt__", - "s2t.LeafNodeTensor.__ne__": "s2t.ChildNodeTensor.__ne__", - "s2t.LeafNodeTensor.__new__": "s2t.ChildNodeTensor.__new__", - "s2t.Path.__new__": "s2t.ChildNodeTensor.__new__", - "s2t.Prensor.__eq__": "s2t.ChildNodeTensor.__eq__", - "s2t.Prensor.__ge__": "s2t.ChildNodeTensor.__ge__", - "s2t.Prensor.__gt__": "s2t.ChildNodeTensor.__gt__", - "s2t.Prensor.__le__": "s2t.ChildNodeTensor.__le__", - "s2t.Prensor.__lt__": "s2t.ChildNodeTensor.__lt__", - "s2t.Prensor.__ne__": "s2t.ChildNodeTensor.__ne__", - "s2t.Prensor.__new__": "s2t.ChildNodeTensor.__new__", - "s2t.RootNodeTensor.__eq__": "s2t.ChildNodeTensor.__eq__", - "s2t.RootNodeTensor.__ge__": "s2t.ChildNodeTensor.__ge__", - "s2t.RootNodeTensor.__gt__": "s2t.ChildNodeTensor.__gt__", - "s2t.RootNodeTensor.__le__": "s2t.ChildNodeTensor.__le__", - "s2t.RootNodeTensor.__lt__": "s2t.ChildNodeTensor.__lt__", - "s2t.RootNodeTensor.__ne__": "s2t.ChildNodeTensor.__ne__", - "s2t.RootNodeTensor.__new__": "s2t.ChildNodeTensor.__new__" - }, - "is_fragment": { - "s2t": false, - "s2t.ChildNodeTensor": false, - "s2t.ChildNodeTensor.__eq__": true, - "s2t.ChildNodeTensor.__ge__": true, - "s2t.ChildNodeTensor.__gt__": true, - "s2t.ChildNodeTensor.__init__": true, - "s2t.ChildNodeTensor.__le__": true, - "s2t.ChildNodeTensor.__lt__": true, - "s2t.ChildNodeTensor.__ne__": true, - "s2t.ChildNodeTensor.__new__": true, - "s2t.ChildNodeTensor.get_positional_index": true, - "s2t.ChildNodeTensor.is_repeated": true, - "s2t.ChildNodeTensor.parent_index": true, - "s2t.ChildNodeTensor.size": true, - "s2t.Expression": false, - "s2t.Expression.__eq__": true, - "s2t.Expression.__ge__": true, - "s2t.Expression.__gt__": true, - "s2t.Expression.__init__": true, - "s2t.Expression.__le__": true, - "s2t.Expression.__lt__": true, - "s2t.Expression.__ne__": true, - "s2t.Expression.__new__": true, - "s2t.Expression.apply": true, - "s2t.Expression.apply_schema": true, - "s2t.Expression.broadcast": true, - "s2t.Expression.calculate": true, - "s2t.Expression.calculation_equal": true, - "s2t.Expression.calculation_is_identity": true, - "s2t.Expression.cogroup_by_index": true, - "s2t.Expression.create_has_field": true, - "s2t.Expression.create_proto_index": true, - "s2t.Expression.create_size_field": true, - "s2t.Expression.get_child": true, - "s2t.Expression.get_child_or_error": true, - "s2t.Expression.get_descendant": true, - "s2t.Expression.get_descendant_or_error": true, - "s2t.Expression.get_known_children": true, - "s2t.Expression.get_known_descendants": true, - "s2t.Expression.get_paths_with_schema": true, - "s2t.Expression.get_schema": true, - "s2t.Expression.get_source_expressions": true, - "s2t.Expression.is_leaf": true, - "s2t.Expression.is_repeated": true, - "s2t.Expression.known_field_names": true, - "s2t.Expression.map_field_values": true, - "s2t.Expression.map_ragged_tensors": true, - "s2t.Expression.map_sparse_tensors": true, - "s2t.Expression.project": true, - "s2t.Expression.promote": true, - "s2t.Expression.promote_and_broadcast": true, - "s2t.Expression.reroot": true, - "s2t.Expression.schema_feature": true, - "s2t.Expression.schema_string": true, - "s2t.Expression.slice": true, - "s2t.Expression.truncate": true, - "s2t.Expression.type": true, - "s2t.LeafNodeTensor": false, - "s2t.LeafNodeTensor.__eq__": true, - "s2t.LeafNodeTensor.__ge__": true, - "s2t.LeafNodeTensor.__gt__": true, - "s2t.LeafNodeTensor.__init__": true, - "s2t.LeafNodeTensor.__le__": true, - "s2t.LeafNodeTensor.__lt__": true, - "s2t.LeafNodeTensor.__ne__": true, - "s2t.LeafNodeTensor.__new__": true, - "s2t.LeafNodeTensor.get_positional_index": true, - "s2t.LeafNodeTensor.is_repeated": true, - "s2t.LeafNodeTensor.parent_index": true, - "s2t.LeafNodeTensor.values": true, - "s2t.NodeTensor": false, - "s2t.Path": false, - "s2t.Path.__add__": true, - "s2t.Path.__eq__": true, - "s2t.Path.__ge__": true, - "s2t.Path.__gt__": true, - "s2t.Path.__init__": true, - "s2t.Path.__le__": true, - "s2t.Path.__len__": true, - "s2t.Path.__lt__": true, - "s2t.Path.__ne__": true, - "s2t.Path.__new__": true, - "s2t.Path.as_proto": true, - "s2t.Path.concat": true, - "s2t.Path.get_child": true, - "s2t.Path.get_least_common_ancestor": true, - "s2t.Path.get_parent": true, - "s2t.Path.is_ancestor": true, - "s2t.Path.prefix": true, - "s2t.Path.suffix": true, - "s2t.Prensor": false, - "s2t.Prensor.__eq__": true, - "s2t.Prensor.__ge__": true, - "s2t.Prensor.__gt__": true, - "s2t.Prensor.__init__": true, - "s2t.Prensor.__le__": true, - "s2t.Prensor.__lt__": true, - "s2t.Prensor.__ne__": true, - "s2t.Prensor.__new__": true, - "s2t.Prensor.field_names": true, - "s2t.Prensor.get_child": true, - "s2t.Prensor.get_child_or_error": true, - "s2t.Prensor.get_children": true, - "s2t.Prensor.get_descendant": true, - "s2t.Prensor.get_descendant_or_error": true, - "s2t.Prensor.get_descendants": true, - "s2t.Prensor.get_ragged_tensor": true, - "s2t.Prensor.get_ragged_tensors": true, - "s2t.Prensor.get_sparse_tensor": true, - "s2t.Prensor.get_sparse_tensors": true, - "s2t.Prensor.is_leaf": true, - "s2t.Prensor.node": true, - "s2t.RootNodeTensor": false, - "s2t.RootNodeTensor.__eq__": true, - "s2t.RootNodeTensor.__ge__": true, - "s2t.RootNodeTensor.__gt__": true, - "s2t.RootNodeTensor.__init__": true, - "s2t.RootNodeTensor.__le__": true, - "s2t.RootNodeTensor.__lt__": true, - "s2t.RootNodeTensor.__ne__": true, - "s2t.RootNodeTensor.__new__": true, - "s2t.RootNodeTensor.get_positional_index": true, - "s2t.RootNodeTensor.is_repeated": true, - "s2t.RootNodeTensor.size": true, - "s2t.Step": false, - "s2t.calculate_prensors": false, - "s2t.calculate_prensors_with_graph": false, - "s2t.calculate_prensors_with_source_paths": false, - "s2t.create_expression_from_file_descriptor_set": false, - "s2t.create_expression_from_prensor": false, - "s2t.create_expression_from_proto": false, - "s2t.create_path": false, - "s2t.create_prensor_from_descendant_nodes": false, - "s2t.create_prensor_from_root_and_children": false, - "s2t.get_default_options": false, - "s2t.get_options_with_minimal_checks": false, - "s2t.get_ragged_tensor": false, - "s2t.get_ragged_tensors": false, - "s2t.get_sparse_tensor": false, - "s2t.get_sparse_tensors": false - }, - "py_module_names": [ - "s2t" - ], - "site_link": null -} diff --git a/g3doc/api_docs/python/s2t/_toc.yaml b/g3doc/api_docs/python/s2t/_toc.yaml deleted file mode 100644 index 04c05ce..0000000 --- a/g3doc/api_docs/python/s2t/_toc.yaml +++ /dev/null @@ -1,55 +0,0 @@ -toc: -- title: s2t - section: - - title: Overview - path: /api_docs/python/s2t - - title: ChildNodeTensor - path: /api_docs/python/s2t/ChildNodeTensor - - title: Expression - path: /api_docs/python/s2t/Expression - - title: LeafNodeTensor - path: /api_docs/python/s2t/LeafNodeTensor - - title: NodeTensor - path: /api_docs/python/s2t/NodeTensor - - title: Path - path: /api_docs/python/s2t/Path - - title: Prensor - path: /api_docs/python/s2t/Prensor - - title: RootNodeTensor - path: /api_docs/python/s2t/RootNodeTensor - - title: Step - path: /api_docs/python/s2t/Step - - title: calculate_prensors - path: /api_docs/python/s2t/calculate_prensors - - title: calculate_prensors_with_graph - path: /api_docs/python/s2t/calculate_prensors_with_graph - - title: calculate_prensors_with_source_paths - path: /api_docs/python/s2t/calculate_prensors_with_source_paths - - title: create_expression_from_file_descriptor_set - path: /api_docs/python/s2t/create_expression_from_file_descriptor_set - - title: create_expression_from_prensor - path: /api_docs/python/s2t/create_expression_from_prensor - - title: create_expression_from_proto - path: /api_docs/python/s2t/create_expression_from_proto - - title: create_path - path: /api_docs/python/s2t/create_path - - title: create_prensor_from_descendant_nodes - path: /api_docs/python/s2t/create_prensor_from_descendant_nodes - - title: create_prensor_from_root_and_children - path: /api_docs/python/s2t/create_prensor_from_root_and_children - - title: get_default_options - path: /api_docs/python/s2t/get_default_options - - title: get_options_with_minimal_checks - path: /api_docs/python/s2t/get_options_with_minimal_checks - - title: get_ragged_tensor - status: deprecated - path: /api_docs/python/s2t/get_ragged_tensor - - title: get_ragged_tensors - status: deprecated - path: /api_docs/python/s2t/get_ragged_tensors - - title: get_sparse_tensor - status: deprecated - path: /api_docs/python/s2t/get_sparse_tensor - - title: get_sparse_tensors - status: deprecated - path: /api_docs/python/s2t/get_sparse_tensors diff --git a/g3doc/api_docs/python/s2t/all_symbols.md b/g3doc/api_docs/python/s2t/all_symbols.md deleted file mode 100644 index bd052bb..0000000 --- a/g3doc/api_docs/python/s2t/all_symbols.md +++ /dev/null @@ -1,29 +0,0 @@ -# All symbols in Struct2Tensor - - - -## Primary symbols -* s2t -* s2t.ChildNodeTensor -* s2t.Expression -* s2t.LeafNodeTensor -* s2t.NodeTensor -* s2t.Path -* s2t.Prensor -* s2t.RootNodeTensor -* s2t.Step -* s2t.calculate_prensors -* s2t.calculate_prensors_with_graph -* s2t.calculate_prensors_with_source_paths -* s2t.create_expression_from_file_descriptor_set -* s2t.create_expression_from_prensor -* s2t.create_expression_from_proto -* s2t.create_path -* s2t.create_prensor_from_descendant_nodes -* s2t.create_prensor_from_root_and_children -* s2t.get_default_options -* s2t.get_options_with_minimal_checks -* s2t.get_ragged_tensor -* s2t.get_ragged_tensors -* s2t.get_sparse_tensor -* s2t.get_sparse_tensors \ No newline at end of file diff --git a/g3doc/api_docs/python/s2t/calculate_prensors.md b/g3doc/api_docs/python/s2t/calculate_prensors.md deleted file mode 100644 index ee30dea..0000000 --- a/g3doc/api_docs/python/s2t/calculate_prensors.md +++ /dev/null @@ -1,81 +0,0 @@ -description: Gets the prensor value of the expressions. - -
- - -
- -# s2t.calculate_prensors - - - - - - - - - -Gets the prensor value of the expressions. - - - - - - - - - - - - - - - - - - - - - - - -
-`expressions` - -expressions to calculate prensors for. -
-`options` - -options for calculate(...). -
-`feed_dict` - -a dictionary, mapping expression to prensor that will be used -as the initial expression in the expression graph. -
- - - - - - - - - - - -
-a list of prensors. -
- diff --git a/g3doc/api_docs/python/s2t/calculate_prensors_with_graph.md b/g3doc/api_docs/python/s2t/calculate_prensors_with_graph.md deleted file mode 100644 index e1a05b3..0000000 --- a/g3doc/api_docs/python/s2t/calculate_prensors_with_graph.md +++ /dev/null @@ -1,83 +0,0 @@ -description: Gets the prensor value of the expressions and the graph used. - -
- - -
- -# s2t.calculate_prensors_with_graph - - - - - - - - - -Gets the prensor value of the expressions and the graph used. - - - - - - - -This method is useful for getting information like the protobuf fields parsed -to create an expression. - - - - - - - - - - - - - - - - -
-`expressions` - -expressions to calculate prensors for. -
-`options` - -options for calculate(...) methods. -
-`feed_dict` - -a dictionary, mapping expression to prensor that will be used -as the initial expression in the expression graph. -
- - - - - - - - - - - -
-a list of prensors, and the graph used to calculate them. -
- diff --git a/g3doc/api_docs/python/s2t/calculate_prensors_with_source_paths.md b/g3doc/api_docs/python/s2t/calculate_prensors_with_source_paths.md deleted file mode 100644 index 068d369..0000000 --- a/g3doc/api_docs/python/s2t/calculate_prensors_with_source_paths.md +++ /dev/null @@ -1,34 +0,0 @@ -description: Returns a list of prensor trees, and proto summaries. - -
- - -
- -# s2t.calculate_prensors_with_source_paths - - - - - - - - - -Returns a list of prensor trees, and proto summaries. - - - - - - diff --git a/g3doc/api_docs/python/s2t/create_expression_from_file_descriptor_set.md b/g3doc/api_docs/python/s2t/create_expression_from_file_descriptor_set.md deleted file mode 100644 index 77c80eb..0000000 --- a/g3doc/api_docs/python/s2t/create_expression_from_file_descriptor_set.md +++ /dev/null @@ -1,93 +0,0 @@ -description: Create an expression from a 1D tensor of serialized protos. - -
- - -
- -# s2t.create_expression_from_file_descriptor_set - - - - - - - - - -Create an expression from a 1D tensor of serialized protos. - - - - - - - - - - - - - - - - - - - - - - - - - - -
-`tensor_of_protos` - -1D tensor of serialized protos. -
-`proto_name` - -fully qualified name (e.g. "some.package.SomeProto") of the -proto in `tensor_of_protos`. -
-`file_descriptor_set` - -The FileDescriptorSet proto containing `proto_name`'s -and all its dependencies' FileDescriptorProto. Note that if file1 imports -file2, then file2's FileDescriptorProto must precede file1's in -file_descriptor_set.file. -
-`message_format` - -Indicates the format of the protocol buffer: is one of -'text' or 'binary'. -
- - - - - - - - - - - -
-An expression. -
- diff --git a/g3doc/api_docs/python/s2t/create_expression_from_prensor.md b/g3doc/api_docs/python/s2t/create_expression_from_prensor.md deleted file mode 100644 index f72995e..0000000 --- a/g3doc/api_docs/python/s2t/create_expression_from_prensor.md +++ /dev/null @@ -1,64 +0,0 @@ -description: Gets an expression representing the prensor. - -
- - -
- -# s2t.create_expression_from_prensor - - - - - - - - - -Gets an expression representing the prensor. - - - - - - - - - - - - - - - - - -
-`t` - -The prensor to represent. -
- - - - - - - - - - - -
-An expression representing the prensor. -
- diff --git a/g3doc/api_docs/python/s2t/create_expression_from_proto.md b/g3doc/api_docs/python/s2t/create_expression_from_proto.md deleted file mode 100644 index 4a720e8..0000000 --- a/g3doc/api_docs/python/s2t/create_expression_from_proto.md +++ /dev/null @@ -1,81 +0,0 @@ -description: Create an expression from a 1D tensor of serialized protos. - -
- - -
- -# s2t.create_expression_from_proto - - - - - - - - - -Create an expression from a 1D tensor of serialized protos. - - - - - - - - - - - - - - - - - - - - - - - -
-`tensor_of_protos` - -1D tensor of serialized protos. -
-`desc` - -a descriptor of protos in tensor of protos. -
-`message_format` - -Indicates the format of the protocol buffer: is one of -'text' or 'binary'. -
- - - - - - - - - - - -
-An expression. -
- diff --git a/g3doc/api_docs/python/s2t/create_path.md b/g3doc/api_docs/python/s2t/create_path.md deleted file mode 100644 index 65ba841..0000000 --- a/g3doc/api_docs/python/s2t/create_path.md +++ /dev/null @@ -1,94 +0,0 @@ -description: Create a path from an object. - -
- - -
- -# s2t.create_path - - - - - - - - - -Create a path from an object. - - - - - - - - -#### The BNF for a path is: - - -letter := [A-Za-z] -digit := [0-9] - := "_"|"-"| | letter | digit - := + - := "(" ( ".")* ")" - := | - := (( ".") * )? - - - - - - - - - - - - -
-`path_source` - -a string or a Path object. -
- - - - - - - - - - - -
-A Path. -
- - - - - - - - - - - - -
-`ValueError` - -if this is not a valid path. -
- diff --git a/g3doc/api_docs/python/s2t/create_prensor_from_descendant_nodes.md b/g3doc/api_docs/python/s2t/create_prensor_from_descendant_nodes.md deleted file mode 100644 index 4222a52..0000000 --- a/g3doc/api_docs/python/s2t/create_prensor_from_descendant_nodes.md +++ /dev/null @@ -1,82 +0,0 @@ -description: Create a prensor from a map of paths to NodeTensor. - -
- - -
- -# s2t.create_prensor_from_descendant_nodes - - - - - - - - - -Create a prensor from a map of paths to NodeTensor. - - - - - - - -If a path is a key in the map, all prefixes of that path must be present. - - - - - - - - - - -
-`nodes` - -A map from paths to NodeTensors. -
- - - - - - - - - - - -
-A Prensor. -
- - - - - - - - - - - - -
-`ValueError` - -if there is a prefix of a path missing. -
- diff --git a/g3doc/api_docs/python/s2t/create_prensor_from_root_and_children.md b/g3doc/api_docs/python/s2t/create_prensor_from_root_and_children.md deleted file mode 100644 index faabc20..0000000 --- a/g3doc/api_docs/python/s2t/create_prensor_from_root_and_children.md +++ /dev/null @@ -1,32 +0,0 @@ -
- - -
- -# s2t.create_prensor_from_root_and_children - - - - - - - - - - - - - - - - diff --git a/g3doc/api_docs/python/s2t/get_default_options.md b/g3doc/api_docs/python/s2t/get_default_options.md deleted file mode 100644 index 9eee444..0000000 --- a/g3doc/api_docs/python/s2t/get_default_options.md +++ /dev/null @@ -1,31 +0,0 @@ -description: Get the default options. - -
- - -
- -# s2t.get_default_options - - - - - - - - - -Get the default options. - - - - - - diff --git a/g3doc/api_docs/python/s2t/get_options_with_minimal_checks.md b/g3doc/api_docs/python/s2t/get_options_with_minimal_checks.md deleted file mode 100644 index 644c0cc..0000000 --- a/g3doc/api_docs/python/s2t/get_options_with_minimal_checks.md +++ /dev/null @@ -1,31 +0,0 @@ -description: Options for calculation with minimal runtime checks. - -
- - -
- -# s2t.get_options_with_minimal_checks - - - - - - - - - -Options for calculation with minimal runtime checks. - - - - - - diff --git a/g3doc/api_docs/python/s2t/get_ragged_tensor.md b/g3doc/api_docs/python/s2t/get_ragged_tensor.md deleted file mode 100644 index 04eaeaf..0000000 --- a/g3doc/api_docs/python/s2t/get_ragged_tensor.md +++ /dev/null @@ -1,86 +0,0 @@ -description: Get a ragged tensor for a path. (deprecated) - -
- - -
- -# s2t.get_ragged_tensor - - - - - - - - - -Get a ragged tensor for a path. (deprecated) - - - - - - - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use the Prensor class method instead. - -All steps are represented in the ragged tensor. - - - - - - - - - - - - - - - - -
-`t` - -The Prensor to extract tensors from. -
-`p` - -the path to a leaf node in `t`. -
-`options` - -used to pass options for calculating ragged tensors. -
- - - - - - - - - - - -
-A ragged tensor containing values of the leaf node, preserving the -structure along the path. Raises an error if the path is not found. -
- diff --git a/g3doc/api_docs/python/s2t/get_ragged_tensors.md b/g3doc/api_docs/python/s2t/get_ragged_tensors.md deleted file mode 100644 index 3bf2171..0000000 --- a/g3doc/api_docs/python/s2t/get_ragged_tensors.md +++ /dev/null @@ -1,75 +0,0 @@ -description: Gets ragged tensors for all the leaves of the prensor expression. (deprecated) - -
- - -
- -# s2t.get_ragged_tensors - - - - - - - - - -Gets ragged tensors for all the leaves of the prensor expression. (deprecated) - - - - - - - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use the Prensor class method instead. - - - - - - - - - - - - - -
-`t` - -The Prensor to extract tensors from. -
-`options` - -used to pass options for calculating ragged tensors. -
- - - - - - - - - - - -
-A map from paths to ragged tensors. -
- diff --git a/g3doc/api_docs/python/s2t/get_sparse_tensor.md b/g3doc/api_docs/python/s2t/get_sparse_tensor.md deleted file mode 100644 index b6ec8ab..0000000 --- a/g3doc/api_docs/python/s2t/get_sparse_tensor.md +++ /dev/null @@ -1,87 +0,0 @@ -description: Gets a sparse tensor for path p. (deprecated) - -
- - -
- -# s2t.get_sparse_tensor - - - - - - - - - -Gets a sparse tensor for path p. (deprecated) - - - - - - - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use the Prensor class method instead. - -Note that any optional fields are not registered as dimensions, as they can't -be represented in a sparse tensor. - - - - - - - - - - - - - - - - -
-`t` - -The Prensor to extract tensors from. -
-`p` - -The path to a leaf node in `t`. -
-`options` - -Currently unused. -
- - - - - - - - - - - -
-A sparse tensor containing values of the leaf node, preserving the -structure along the path. Raises an error if the path is not found. -
- diff --git a/g3doc/api_docs/python/s2t/get_sparse_tensors.md b/g3doc/api_docs/python/s2t/get_sparse_tensors.md deleted file mode 100644 index 4c50fb7..0000000 --- a/g3doc/api_docs/python/s2t/get_sparse_tensors.md +++ /dev/null @@ -1,75 +0,0 @@ -description: Gets sparse tensors for all the leaves of the prensor expression. (deprecated) - -
- - -
- -# s2t.get_sparse_tensors - - - - - - - - - -Gets sparse tensors for all the leaves of the prensor expression. (deprecated) - - - - - - - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: -Use the Prensor class method instead. - - - - - - - - - - - - - -
-`t` - -The Prensor to extract tensors from. -
-`options` - -Currently unused. -
- - - - - - - - - - - -
-A map from paths to sparse tensors. -
- diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..67a6e4f --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,129 @@ +site_name: Struct2Tensor +repo_name: "Struct2Tensor" +repo_url: https://github.com/google/struct2tensor + +theme: + name: material + palette: + # Palette toggle for automatic mode + - media: "(prefers-color-scheme)" + primary: custom + accent: custom + toggle: + icon: material/brightness-auto + name: Switch to light mode + + # Palette toggle for light mode + - media: "(prefers-color-scheme: light)" + primary: custom + accent: custom + scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - media: "(prefers-color-scheme: dark)" + primary: custom + accent: custom + scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to system preference + favicon: assets/favicon.png + + features: + - content.code.copy + - content.code.select + - content.action.edit +plugins: + - search + - autorefs + - mkdocstrings: + default_handler: python + handlers: + python: + options: + show_source: true + show_root_heading: true + unwrap_annotated: true + show_symbol_type_toc: true + show_symbol_type_heading: true + merge_init_into_class: true + show_signature_annotations: true + separate_signature: true + signature_crossrefs: true + group_by_category: true + show_category_heading: true + inherited_members: true + show_submodules: true + show_object_full_path: false + show_root_full_path: true + docstring_section_style: "spacy" + show_if_no_docstring: true + summary: true + filters: + - "!^_" + - "^__init__$" + - "^__call__$" + - "!^logger" + - "!_test$" + - "!_test_util$" + extensions: + - griffe_inherited_docstrings + import: + - https://docs.python.org/3/objects.inv + - mkdocs-jupyter: + execute: false + - caption: + figure: + ignore_alt: true + +markdown_extensions: + - admonition + - attr_list + - def_list + - tables + - toc: + permalink: true + - pymdownx.highlight: + anchor_linenums: true + linenums: false + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - pymdownx.arithmatex: + generic: true + - pymdownx.critic + - pymdownx.caret + - pymdownx.keys + - pymdownx.mark + - pymdownx.tilde + - markdown_grid_tables + - md_in_html + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + +extra_css: + - stylesheets/extra.css + +extra_javascript: + - javascripts/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js + +watch: + - struct2tensor +nav: + - Examples: + - "Your structured data into Tensorflow": examples/prensor_playground + + - API Docs: + - "s2t": + "Overview": api_docs/python/s2t + "s2t": api_docs/python/s2t/s2t + - "expression_impl": + "Overview": api_docs/python/expression_impl + "expression_impl": api_docs/python/expression_impl/expression_impl diff --git a/requirements-docs.txt b/requirements-docs.txt new file mode 100644 index 0000000..bf02e12 --- /dev/null +++ b/requirements-docs.txt @@ -0,0 +1,9 @@ +mkdocs +mkdocs-material +mkdocstrings[python] +griffe-inherited-docstrings +mkdocs-autorefs +mkdocs-jupyter +mkdocs-caption +markdown-grid-tables +black diff --git a/setup.py b/setup.py index f072d74..4774fc7 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,11 @@ def select_constraint(default, nightly=None, git_master=None): exec(fp.read(), globals_dict) # pylint: disable=exec-used __version__ = globals_dict['__version__'] +# Get documentation build requirements +with open("requirements-docs.txt", "r") as fp: + docs_reqs = fp.readlines() +docs_reqs = [req.replace("\n", "") for req in docs_reqs] + setup( name='struct2tensor', version=__version__, @@ -90,6 +95,7 @@ def select_constraint(default, nightly=None, git_master=None): ), 'pyarrow>=10,<11', ], + extras_require={"docs": docs_reqs}, # Add in any packaged data. include_package_data=True, package_data={'': ['*.lib', '*.so']}, diff --git a/struct2tensor/__init__.py b/struct2tensor/__init__.py index dbf5aac..80db160 100644 --- a/struct2tensor/__init__.py +++ b/struct2tensor/__init__.py @@ -18,14 +18,18 @@ from struct2tensor.calculate import calculate_prensors_with_graph from struct2tensor.calculate_options import get_default_options from struct2tensor.calculate_options import get_options_with_minimal_checks -from struct2tensor.calculate_with_source_paths import calculate_prensors_with_source_paths +from struct2tensor.calculate_with_source_paths import ( + calculate_prensors_with_source_paths, +) # Import expressions API. from struct2tensor.create_expression import create_expression_from_prensor from struct2tensor.expression import Expression # Import expression queries API -from struct2tensor.expression_impl.proto import create_expression_from_file_descriptor_set +from struct2tensor.expression_impl.proto import ( + create_expression_from_file_descriptor_set, +) from struct2tensor.expression_impl.proto import create_expression_from_proto # Import path API @@ -52,3 +56,30 @@ # tf.compat.v1.Session.run() will be able to take a Prensor and return a # PrensorValue. import struct2tensor.prensor_value + +__all__ = [ + "s2t", + "calculate_prensors", + "calculate_prensors_with_graph", + "calculate_prensors_with_source_paths", + "ChildNodeTensor", + "create_expression_from_file_descriptor_set", + "create_expression_from_prensor", + "create_expression_from_proto", + "create_path", + "create_prensor_from_descendant_nodes", + "create_prensor_from_root_and_children", + "Expression", + "get_default_options", + "get_options_with_minimal_checks", + "get_ragged_tensor", + "get_ragged_tensors", + "get_sparse_tensor", + "get_sparse_tensors", + "LeafNodeTensor", + "NodeTensor", + "Path", + "Prensor", + "RootNodeTensor", + "Step", +] diff --git a/struct2tensor/expression.py b/struct2tensor/expression.py index c3b7665..33987bc 100644 --- a/struct2tensor/expression.py +++ b/struct2tensor/expression.py @@ -300,7 +300,7 @@ def get_known_descendants(self) -> Mapping[path.Path, "Expression"]: Returns: A mapping from paths (relative to the root of the subexpression) to - expressions. + expressions. """ known_subexpressions = { k: v.get_known_descendants() @@ -478,7 +478,8 @@ def map_field_values(self, source_path: CoercableToPath, new_field_name: path.Step) -> "Expression": """Map a primitive field to create a new primitive field. - Note: the dtype argument is added since the v1 API. + !!! Note + The dtype argument is added since the v1 API. Args: source_path: the origin path. @@ -601,13 +602,14 @@ def get_schema(self, create_schema_features=True) -> schema_pb2.Schema: def schema_string(self, limit: Optional[int] = None) -> str: """Returns a schema for the expression. - E.g. - + For examle, + ``` repeated root: optional int32 foo optional bar: optional string baz optional int64 bak + ``` Note that unknown fields and subexpressions are not displayed. diff --git a/struct2tensor/expression_impl/__init__.py b/struct2tensor/expression_impl/__init__.py index acd29ae..df7582a 100644 --- a/struct2tensor/expression_impl/__init__.py +++ b/struct2tensor/expression_impl/__init__.py @@ -16,7 +16,7 @@ The modules in this file should be accessed like the following: -``` +```python import struct2tensor as s2t from struct2tensor import expression_impl @@ -41,3 +41,68 @@ from struct2tensor.expression_impl import reroot from struct2tensor.expression_impl import size from struct2tensor.expression_impl import slice_expression + + +__all__ = [ + "apply_schema", + "apply_schema.apply_schema", + "broadcast", + "broadcast.broadcast", + "broadcast.broadcast_anonymous", + "depth_limit", + "depth_limit.limit_depth", + "filter_expression", + "filter_expression.filter_by_child", + "filter_expression.filter_by_sibling", + "index", + "index.get_index_from_end", + "index.get_positional_index", + "map_prensor", + "map_prensor.map_ragged_tensor", + "map_prensor.map_sparse_tensor", + "map_prensor_to_prensor", + "map_prensor_to_prensor.create_schema", + "map_prensor_to_prensor.map_prensor_to_prensor", + "map_prensor_to_prensor.Schema", + "map_values", + "map_values.map_many_values", + "map_values.map_values", + "map_values.map_values_anonymous", + "parquet", + "parquet.calculate_parquet_values", + "parquet.create_expression_from_parquet_file", + "parquet.ParquetDataset", + "placeholder", + "placeholder.create_expression_from_schema", + "placeholder.get_placeholder_paths_from_graph", + "project", + "project.project", + "promote", + "promote_and_broadcast", + "promote_and_broadcast.promote_and_broadcast", + "promote_and_broadcast.promote_and_broadcast_anonymous", + "promote.promote", + "promote.promote_anonymous", + "promote.PromoteChildExpression", + "promote.PromoteExpression", + "proto", + "proto.create_expression_from_file_descriptor_set", + "proto.create_expression_from_proto", + "proto.create_transformed_field", + "proto.DescriptorPool", + "proto.FileDescriptorSet", + "proto.is_proto_expression", + "proto.ProtoExpression", + "proto.TransformFn", + "reroot", + "reroot.create_proto_index_field", + "reroot.reroot", + "size", + "size.has", + "size.size", + "size.size_anonymous", + "size.SizeExpression", + "slice_expression", + "slice_expression.IndexValue", + "slice_expression.slice_expression", +] diff --git a/struct2tensor/expression_impl/apply_schema.py b/struct2tensor/expression_impl/apply_schema.py index edcfb7c..0787ac5 100644 --- a/struct2tensor/expression_impl/apply_schema.py +++ b/struct2tensor/expression_impl/apply_schema.py @@ -32,13 +32,15 @@ This does not filter out fields not in the schema. - +```python my_expr = ... -my_schema = ...schema here... +my_schema = # ...schema here... my_new_schema = my_expr.apply_schema(my_schema).get_schema() -my_new_schema has semantically identical information on the fields as my_schema. +# my_new_schema has semantically identical information on the fields as my_schema. +``` TODO(martinz): Add utilities to: + 1. Get the (non-deprecated) paths from a schema. 2. Check if any paths in the schema are not in the expression. 3. Check if any paths in the expression are not in the schema. diff --git a/struct2tensor/expression_impl/broadcast.py b/struct2tensor/expression_impl/broadcast.py index 2eb0873..78f2798 100644 --- a/struct2tensor/expression_impl/broadcast.py +++ b/struct2tensor/expression_impl/broadcast.py @@ -26,7 +26,9 @@ +-event* | +-val*-int64 +``` +```json session: { event: {} event: {} @@ -42,7 +44,7 @@ Then: -``` +```python broadcast.broadcast(expr, path.Path(["session","val"]), "event", "nv") ``` @@ -58,7 +60,9 @@ | +---nv*-int64 | +-val*-int64 +``` +```json session: { event: { nv: 10 diff --git a/struct2tensor/expression_impl/filter_expression.py b/struct2tensor/expression_impl/filter_expression.py index a791f05..5cca1d1 100644 --- a/struct2tensor/expression_impl/filter_expression.py +++ b/struct2tensor/expression_impl/filter_expression.py @@ -53,7 +53,7 @@ The following call will have the same effect as above: -``` +```python root_2 = filter_expression.filter_by_child( root, path.create_path("doc"), "keep_me", "new_doc") ``` diff --git a/struct2tensor/expression_impl/index.py b/struct2tensor/expression_impl/index.py index a18d457..63436f1 100644 --- a/struct2tensor/expression_impl/index.py +++ b/struct2tensor/expression_impl/index.py @@ -19,7 +19,7 @@ Given: -``` +```json session: { event: { val: 111 @@ -41,13 +41,13 @@ } ``` -``` +```python get_positional_index(expr, path.Path(["event","val"]), "val_index") ``` yields: -``` +```json session: { event: { val: 111 @@ -75,12 +75,12 @@ } ``` -``` +```python get_index_from_end(expr, path.Path(["event","val"]), "neg_val_index") ``` yields: -``` +```json session: { event: { val: 111 diff --git a/struct2tensor/expression_impl/map_prensor.py b/struct2tensor/expression_impl/map_prensor.py index cec43b1..2ed572e 100644 --- a/struct2tensor/expression_impl/map_prensor.py +++ b/struct2tensor/expression_impl/map_prensor.py @@ -18,7 +18,7 @@ Assume expr is: -``` +```json session: { event: { val_a: 10 @@ -45,7 +45,7 @@ map_sparse_tensor converts val_a and val_b to sparse tensors, and then add them to produce val_sum. -``` +```python new_root = map_prensor.map_sparse_tensor( expr, path.Path(["event"]), @@ -59,7 +59,7 @@ map_ragged_tensor converts val_a and val_b to ragged tensors, and then add them to produce val_sum. -``` +```python new_root = map_prensor.map_ragged_tensor( expr, path.Path(["event"]), @@ -72,7 +72,7 @@ The result of either is: -``` +```json session: { event: { val_a: 10 @@ -130,7 +130,7 @@ def map_sparse_tensor(root: expression.Expression, root_path: path.Path, Returns: A new root expression containing the old root expression plus the new path, - root_path.get_child(new_field_name), with the result of the operation. + root_path.get_child(new_field_name), with the result of the operation. """ return _map_sparse_tensor_impl(root, root_path, paths, operation, is_repeated, @@ -157,7 +157,7 @@ def map_ragged_tensor(root: expression.Expression, root_path: path.Path, Returns: A new root expression containing the old root expression plus the new path, - root_path.get_child(new_field_name), with the result of the operation. + root_path.get_child(new_field_name), with the result of the operation. """ return _map_ragged_tensor_impl(root, root_path, paths, operation, is_repeated, dtype, new_field_name)[0] @@ -353,8 +353,8 @@ def _map_ragged_tensor_impl(root: expression.Expression, root_path: path.Path, Returns: An expression/path pair (expr,p) with a new root expression containing - the old root expression plus the new path, - root_path.get_child(new_field_name), with the result of the operation. + the old root expression plus the new path, + root_path.get_child(new_field_name), with the result of the operation. """ def new_op(tree: prensor.Prensor, diff --git a/struct2tensor/expression_impl/map_prensor_to_prensor.py b/struct2tensor/expression_impl/map_prensor_to_prensor.py index ce3246b..d54ab0b 100644 --- a/struct2tensor/expression_impl/map_prensor_to_prensor.py +++ b/struct2tensor/expression_impl/map_prensor_to_prensor.py @@ -34,7 +34,7 @@ foo2 bar2 ``` -``` +```python my_result_schema = create_schema( is_repeated=True, children={"foo2":{is_repeated:True, dtype:tf.int64}, @@ -49,7 +49,9 @@ event / \ foo bar +``` +```python result = map_prensor_to_prensor( original, path.Path(["session","event"]), @@ -155,11 +157,13 @@ def create_schema(is_repeated: bool = True, children: Optional[Dict[path.Step, Any]] = None) -> Schema: """Create a schema recursively. - Example: - my_result_schema = create_schema( - is_repeated=True, - children={"foo2":{is_repeated=True, dtype=tf.int64}, - "bar2":{is_repeated=False, dtype=tf.int64}}) + !!! Example + ```python + my_result_schema = create_schema( + is_repeated=True, + children={"foo2":{is_repeated=True, dtype=tf.int64}, + "bar2":{is_repeated=False, dtype=tf.int64}}) + ``` Args: is_repeated: whether the root is repeated. @@ -211,37 +215,46 @@ def map_prensor_to_prensor( For example, suppose you have an op my_op, that takes a prensor of the form: + ``` event - / \ - foo bar + / \ + foo bar + ``` and produces a prensor of the form my_result_schema: - event - / \ - foo2 bar2 + ``` + event + / \ + foo2 bar2 + ``` If you give it an expression original with the schema: + ``` session | event / \ foo bar - + ``` + ```python result = map_prensor_to_prensor( original, path.Path(["session","event"]), my_op, my_output_schema) + ``` Result will have the schema: + ``` session | event-------- / \ \ \ foo bar foo2 bar2 + ``` Args: root_expr: the root expression diff --git a/struct2tensor/expression_impl/parquet.py b/struct2tensor/expression_impl/parquet.py index ff66324..07b6b1e 100644 --- a/struct2tensor/expression_impl/parquet.py +++ b/struct2tensor/expression_impl/parquet.py @@ -13,17 +13,16 @@ # limitations under the License. """Apache Parquet Dataset. -Example usage: +!!! Example "Example Usage" + ```python + exp = create_expression_from_parquet_file(filenames) + docid_project_exp = project.project(exp, [path.Path(["DocId"])]) + pqds = parquet_dataset.calculate_parquet_values([docid_project_exp], exp, + filenames, batch_size) -``` - exp = create_expression_from_parquet_file(filenames) - docid_project_exp = project.project(exp, [path.Path(["DocId"])]) - pqds = parquet_dataset.calculate_parquet_values([docid_project_exp], exp, - filenames, batch_size) - - for prensors in pqds: - doc_id_prensor = prensors[0] -``` + for prensors in pqds: + doc_id_prensor = prensors[0] + ``` """ @@ -52,7 +51,7 @@ def create_expression_from_parquet_file( Returns: A PlaceholderRootExpression that should be used as the root of an expression - graph. + graph. """ metadata = pq.ParquetFile(filenames[0]).metadata @@ -220,14 +219,18 @@ class ParquetDataset(_RawParquetDataset): The prensor will have a PrensorTypeSpec, which is created based on value_paths. - Note: In tensorflow v1 this dataset will not return a prensor. The output will - be the same format as _RawParquetDataset's output (a vector of tensors). - The following is a workaround in v1: - pq_ds = ParquetDataset(...) - type_spec = pq_ds.element_spec - tensors = pq_ds.make_one_shot_iterator().get_next() - prensor = type_spec.from_components(tensors) - session.run(prensor) + !!! Note + In tensorflow v1 this dataset will not return a prensor. The output will + be the same format as _RawParquetDataset's output (a vector of tensors). + The following is a workaround in v1: + + ```python + pq_ds = ParquetDataset(...) + type_spec = pq_ds.element_spec + tensors = pq_ds.make_one_shot_iterator().get_next() + prensor = type_spec.from_components(tensors) + session.run(prensor) + ``` """ def __init__(self, filenames: List[str], value_paths: List[str], diff --git a/struct2tensor/expression_impl/parse_message_level_ex.py b/struct2tensor/expression_impl/parse_message_level_ex.py index cba7895..2ef6aac 100644 --- a/struct2tensor/expression_impl/parse_message_level_ex.py +++ b/struct2tensor/expression_impl/parse_message_level_ex.py @@ -25,7 +25,7 @@ Specifically, consider google.protobuf.Any and proto maps: -``` +```python package foo.bar; message MyMessage { @@ -53,7 +53,7 @@ Thus, we can run: -``` +```python my_message_serialized_tensor = ... my_message_parsed = parse_message_level_ex( diff --git a/struct2tensor/expression_impl/placeholder.py b/struct2tensor/expression_impl/placeholder.py index 782e9a7..ee0e1d9 100644 --- a/struct2tensor/expression_impl/placeholder.py +++ b/struct2tensor/expression_impl/placeholder.py @@ -22,7 +22,7 @@ Sample usage: -``` +```python placeholder_exp = placeholder.create_expression_from_schema(schema) new_exp = expression_queries(placeholder_exp, ..) result = calculate.calculate_values([new_exp], @@ -53,7 +53,7 @@ def create_expression_from_schema( Returns: A PlaceholderRootExpression that should be used as the root of an expression - graph. + graph. """ return _PlaceholderRootExpression(schema) diff --git a/struct2tensor/expression_impl/project.py b/struct2tensor/expression_impl/project.py index 1983698..de1cb2d 100644 --- a/struct2tensor/expression_impl/project.py +++ b/struct2tensor/expression_impl/project.py @@ -15,16 +15,15 @@ project is often used right before calculating the value. -Example: - -``` -expr = ... -new_expr = project.project(expr, [path.Path(["foo","bar"]), - path.Path(["x", "y"])]) -[prensor_result] = calculate.calculate_prensors([new_expr]) -``` - -prensor_result now has two paths, "foo.bar" and "x.y". +!!! Example + ```python + expr = ... + new_expr = project.project(expr, [path.Path(["foo","bar"]), + path.Path(["x", "y"])]) + [prensor_result] = calculate.calculate_prensors([new_expr]) + ``` + + prensor_result now has two paths, "foo.bar" and "x.y". """ diff --git a/struct2tensor/expression_impl/promote.py b/struct2tensor/expression_impl/promote.py index 3f14a19..b3ea8a7 100644 --- a/struct2tensor/expression_impl/promote.py +++ b/struct2tensor/expression_impl/promote.py @@ -27,7 +27,9 @@ +-event* | +-val*-int64 +``` +```json session: { event: { val: 111 @@ -50,7 +52,7 @@ ``` -``` +```python promote.promote(expr, path.Path(["session", "event", "val"]), nval) ``` @@ -66,7 +68,9 @@ | +-val*-int64 | +-nval*-int64 +``` +```python session: { event: { val: 111 diff --git a/struct2tensor/expression_impl/promote_and_broadcast.py b/struct2tensor/expression_impl/promote_and_broadcast.py index c916ccd..1874627 100644 --- a/struct2tensor/expression_impl/promote_and_broadcast.py +++ b/struct2tensor/expression_impl/promote_and_broadcast.py @@ -27,7 +27,9 @@ +-user_info? (question mark indicates optional) | +-age? int64 +``` +```json session: { event: { val: 1 @@ -55,7 +57,7 @@ } ``` -``` +```python promote_and_broadcast.promote_and_broadcast( path.Path(["event"]),{"nage":path.Path(["user_info","age"])}) ``` @@ -76,7 +78,9 @@ +-user_info? (question mark indicates optional) | +-age? int64 +``` +```json session: { event: { nage: 25 @@ -159,7 +163,7 @@ def promote_and_broadcast(root: expression.Expression, Returns: A new expression, where all the origin paths are promoted and broadcast - until they are children of dest_path_parent. + until they are children of dest_path_parent. """ result_paths = {} diff --git a/struct2tensor/expression_impl/proto.py b/struct2tensor/expression_impl/proto.py index ce61c9d..f6f4b02 100644 --- a/struct2tensor/expression_impl/proto.py +++ b/struct2tensor/expression_impl/proto.py @@ -125,30 +125,32 @@ def transform_fn(parent_indices, values): return (transformed_parent_indices, transformed_values) Given: + - parent_indices: an int64 vector of non-decreasing parent message indices. - values: a string vector of serialized protos having the same shape as `parent_indices`. + `transform_fn` must return new parent indices and serialized values encoding the same proto message as the passed in `values`. These two vectors must have the same size, but it need not be the same as the input arguments. - Note: - If CalculateOptions.use_string_view (set at calculate time, thus this - Expression cannot know beforehand) is True, `values` passed to - `transform_fn` are string views pointing all the way back to the original - input tensor (of serialized root protos). And `transform_fn` must maintain - such views and avoid creating new values that are either not string views - into the root protos or self-owned strings. This is because downstream - decoding ops will still produce string views referring into its input - (which are string views into the root proto) and they will only hold a - reference to the original, root proto tensor, keeping it alive. So the input - tensor may get destroyed after the decoding op. + !!! Note + If CalculateOptions.use_string_view (set at calculate time, thus this + Expression cannot know beforehand) is True, `values` passed to + `transform_fn` are string views pointing all the way back to the original + input tensor (of serialized root protos). And `transform_fn` must maintain + such views and avoid creating new values that are either not string views + into the root protos or self-owned strings. This is because downstream + decoding ops will still produce string views referring into its input + (which are string views into the root proto) and they will only hold a + reference to the original, root proto tensor, keeping it alive. So the input + tensor may get destroyed after the decoding op. - In short, you can do element-wise transforms to `values`, but can't mutate - the contents of elements in `values` or create new elements. + In short, you can do element-wise transforms to `values`, but can't mutate + the contents of elements in `values` or create new elements. - To lift this restriction, a decoding op must be told to hold a reference - of the input tensors of all its upstream decoding ops. + To lift this restriction, a decoding op must be told to hold a reference + of the input tensors of all its upstream decoding ops. Args: @@ -233,6 +235,7 @@ class _ProtoChildNodeTensor(prensor.ChildNodeTensor): information needed by its children. In particular: + 1. Any needed regular fields are included. 2. Any needed extended fields are included. 3. Any needed map fields are included. @@ -365,11 +368,12 @@ class _ProtoChildExpression(_AbstractProtoChildExpression): """An expression representing a proto submessage. Supports: - A standard submessage. - An extension submessage. - A protobuf.Any submessage. - A proto map submessage. - Also supports having fields of the above types. + + - A standard submessage. + - An extension submessage. + - A protobuf.Any submessage. + - A proto map submessage. + - Also supports having fields of the above types. """ def __init__(self, parent: "_ParentProtoExpression", @@ -680,10 +684,10 @@ def _get_child( """Get a child expression. This will get one of the following: - A regular field. - An extension. - An Any filtered by value. - A map field. + - A regular field. + - An extension. + - An Any filtered by value. + - A map field. Args: parent: The parent expression. diff --git a/struct2tensor/expression_impl/size.py b/struct2tensor/expression_impl/size.py index aa430be..c087d48 100644 --- a/struct2tensor/expression_impl/size.py +++ b/struct2tensor/expression_impl/size.py @@ -15,14 +15,14 @@ Given a field "foo.bar", -``` +```python root = size(expr, path.Path(["foo","bar"]), "bar_size") ``` creates a new expression root that has an optional field "foo.bar_size", which is always present, and contains the number of bar in a particular foo. -``` +```python root_2 = has(expr, path.Path(["foo","bar"]), "bar_has") ``` diff --git a/struct2tensor/expression_impl/slice_expression.py b/struct2tensor/expression_impl/slice_expression.py index 92ed808..b022374 100644 --- a/struct2tensor/expression_impl/slice_expression.py +++ b/struct2tensor/expression_impl/slice_expression.py @@ -21,7 +21,7 @@ For example: -``` +```python >>> x = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] >>> print(x[2:5]) # all elements between index 2 inclusive and index 5 exclusive ['c', 'd', 'e'] @@ -45,7 +45,7 @@ A prensor can be considered to be interleaved lists and dictionaries. E.g.: -``` +```python my_expression = [{ "foo":[ {"bar":[ @@ -62,7 +62,7 @@ }] ``` -``` +```python result_1 = slice_expression.slice_expression( my_expression, "foo.bar", "new_bar",begin=1, end=3) @@ -89,7 +89,7 @@ }] ``` -``` +```python result_2 = slice_expression.slice_expression( my_expression, "foo.bar.baz", "new_baz",begin=1, end=3) @@ -234,12 +234,14 @@ def _get_slice_mask( For example, given: an index with respect to its parent The range is specified with beginning and an end. + 1. If begin is not present, begin_index is implied to be zero. 2. If begin is negative, begin_index is the size of a particular list + begin 3. If end is not present, end_index is the length of the list + 1. 4. If end is negative, end_index is the length of the list + end 5. If end is non-negative, end_index is end. + The mask is positive for all elements in range(begin_index, end_index), and negative elsewhere. diff --git a/struct2tensor/path.py b/struct2tensor/path.py index 584e185..1a36e9c 100644 --- a/struct2tensor/path.py +++ b/struct2tensor/path.py @@ -317,6 +317,7 @@ def create_path(path_source: CoercableToPath) -> Path: """Create a path from an object. The BNF for a path is: + ``` letter := [A-Za-z] digit := [0-9] := "_"|"-"| | letter | digit @@ -324,6 +325,7 @@ def create_path(path_source: CoercableToPath) -> Path: := "(" ( ".")* ")" := | := (( ".") * )? + ``` TODO(martinz): consider removing dash. This would break YouTube WatchNext. diff --git a/struct2tensor/prensor.py b/struct2tensor/prensor.py index d80d96d..b41e057 100644 --- a/struct2tensor/prensor.py +++ b/struct2tensor/prensor.py @@ -122,13 +122,16 @@ def get_positional_index(self) -> tf.Tensor: The positional index tells us which index of the parent an element is. - For example, with the following parent indices: [0, 0, 2] + For example, with the following parent indices: `[0, 0, 2]` we would have positional index: + + ```python [ 0, # The 0th element of the 0th parent. 1, # The 1st element of the 0th parent. 0 # The 0th element of the 2nd parent. - ]. + ] + ``` For more information, view ops/run_length_before_op.cc @@ -183,13 +186,16 @@ def get_positional_index(self) -> tf.Tensor: The positional index tells us which index of the parent an element is. - For example, with the following parent indices: [0, 0, 2] + For example, with the following parent indices: `[0, 0, 2]` we would have positional index: + + ```python [ 0, # The 0th element of the 0th parent. 1, # The 1st element of the 0th parent. 0 # The 0th element of the 2nd parent. - ]. + ] + ``` For more information, view ops/run_length_before_op.cc @@ -455,7 +461,7 @@ def get_ragged_tensor( Returns: A ragged tensor containing values of the leaf node, preserving the - structure along the path. Raises an error if the path is not found. + structure along the path. Raises an error if the path is not found. """ return _get_ragged_tensor(self, p, options=options) @@ -476,7 +482,7 @@ def get_sparse_tensor( Returns: A sparse tensor containing values of the leaf node, preserving the - structure along the path. Raises an error if the path is not found. + structure along the path. Raises an error if the path is not found. """ return _get_sparse_tensor(self, p, options=options) diff --git a/struct2tensor/prensor_util.py b/struct2tensor/prensor_util.py index 234e6d6..54cee07 100644 --- a/struct2tensor/prensor_util.py +++ b/struct2tensor/prensor_util.py @@ -47,7 +47,7 @@ def get_sparse_tensor( Returns: A sparse tensor containing values of the leaf node, preserving the - structure along the path. Raises an error if the path is not found. + structure along the path. Raises an error if the path is not found. """ return t.get_sparse_tensor(p, options) @@ -88,7 +88,7 @@ def get_ragged_tensor( Returns: A ragged tensor containing values of the leaf node, preserving the - structure along the path. Raises an error if the path is not found. + structure along the path. Raises an error if the path is not found. """ return t.get_ragged_tensor(p, options)