From 20c9d0a2dbf5198dfc8611fa2b3ae31eb05d58e2 Mon Sep 17 00:00:00 2001
From: David Cavazos
Date: Thu, 12 Sep 2019 13:38:34 -0700
Subject: [PATCH] [BEAM-7389] Generate notebooks for filter
---
.../python/element-wise/filter-py.ipynb | 521 ++++++++++++++++++
website/notebooks/docs.yaml | 91 +++
website/notebooks/generate.py | 73 +++
website/notebooks/imports/license.md | 19 +
website/notebooks/imports/setup.md | 9 +
.../transforms/python/element-wise/filter.md | 132 ++++-
6 files changed, 821 insertions(+), 24 deletions(-)
create mode 100644 examples/notebooks/documentation/transforms/python/element-wise/filter-py.ipynb
create mode 100644 website/notebooks/docs.yaml
create mode 100644 website/notebooks/generate.py
create mode 100644 website/notebooks/imports/license.md
create mode 100644 website/notebooks/imports/setup.md
diff --git a/examples/notebooks/documentation/transforms/python/element-wise/filter-py.ipynb b/examples/notebooks/documentation/transforms/python/element-wise/filter-py.ipynb
new file mode 100644
index 0000000000000..68d8f380b34f5
--- /dev/null
+++ b/examples/notebooks/documentation/transforms/python/element-wise/filter-py.ipynb
@@ -0,0 +1,521 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-the-docs-top"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "_-code"
+ },
+ "outputs": [],
+ "source": [
+ "#@title Licensed under the Apache License, Version 2.0 (the \"License\")\n",
+ "# Licensed to the Apache Software Foundation (ASF) under one\n",
+ "# or more contributor license agreements. See the NOTICE file\n",
+ "# distributed with this work for additional information\n",
+ "# regarding copyright ownership. The ASF licenses this file\n",
+ "# to you under the Apache License, Version 2.0 (the\n",
+ "# \"License\"); you may not use this file except in compliance\n",
+ "# with the License. You may obtain a copy of the License at\n",
+ "#\n",
+ "# http://www.apache.org/licenses/LICENSE-2.0\n",
+ "#\n",
+ "# Unless required by applicable law or agreed to in writing,\n",
+ "# software distributed under the License is distributed on an\n",
+ "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+ "# KIND, either express or implied. See the License for the\n",
+ "# specific language governing permissions and limitations\n",
+ "# under the License."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "filter"
+ },
+ "source": [
+ "# Filter\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "Given a predicate, filter out all elements that don't satisfy that predicate.\n",
+ "May also be used to filter based on an inequality with a given value based\n",
+ "on the comparison ordering of the element."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "setup"
+ },
+ "source": [
+ "## Setup\n",
+ "\n",
+ "First, let's install the `apache-beam` module."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "setup-code"
+ },
+ "outputs": [],
+ "source": [
+ "!pip install --quiet -U apache-beam"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "examples"
+ },
+ "source": [
+ "## Examples\n",
+ "\n",
+ "In the following examples, we create a pipeline with a `PCollection` of produce with their icon, name, and duration.\n",
+ "Then, we apply `Filter` in multiple ways to filter out produce by their duration value.\n",
+ "\n",
+ "`Filter` accepts a function that keeps elements that return `True`, and filters out the remaining elements."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-1-filtering-with-a-function"
+ },
+ "source": [
+ "### Example 1: Filtering with a function\n",
+ "\n",
+ "We define a function `is_perennial` which returns `True` if the element's duration equals `'perennial'`, and `False` otherwise."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "example-1-filtering-with-a-function-code"
+ },
+ "outputs": [],
+ "source": [
+ "import apache_beam as beam\n",
+ "\n",
+ "def is_perennial(plant):\n",
+ " return plant['duration'] == 'perennial'\n",
+ "\n",
+ "with beam.Pipeline() as pipeline:\n",
+ " perennials = (\n",
+ " pipeline\n",
+ " | 'Gardening plants' >> beam.Create([\n",
+ " {'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'},\n",
+ " {'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'},\n",
+ " {'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'},\n",
+ " {'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'},\n",
+ " {'icon': '🥔', 'name': 'Potato', 'duration': 'perennial'},\n",
+ " ])\n",
+ " | 'Filter perennials' >> beam.Filter(is_perennial)\n",
+ " | beam.Map(print)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-1-filtering-with-a-function-2"
+ },
+ "source": [
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-2-filtering-with-a-lambda-function"
+ },
+ "source": [
+ "### Example 2: Filtering with a lambda function\n",
+ "\n",
+ "We can also use lambda functions to simplify **Example 1**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "example-2-filtering-with-a-lambda-function-code"
+ },
+ "outputs": [],
+ "source": [
+ "import apache_beam as beam\n",
+ "\n",
+ "with beam.Pipeline() as pipeline:\n",
+ " perennials = (\n",
+ " pipeline\n",
+ " | 'Gardening plants' >> beam.Create([\n",
+ " {'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'},\n",
+ " {'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'},\n",
+ " {'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'},\n",
+ " {'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'},\n",
+ " {'icon': '🥔', 'name': 'Potato', 'duration': 'perennial'},\n",
+ " ])\n",
+ " | 'Filter perennials' >> beam.Filter(\n",
+ " lambda plant: plant['duration'] == 'perennial')\n",
+ " | beam.Map(print)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-2-filtering-with-a-lambda-function-2"
+ },
+ "source": [
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-3-filtering-with-multiple-arguments"
+ },
+ "source": [
+ "### Example 3: Filtering with multiple arguments\n",
+ "\n",
+ "You can pass functions with multiple arguments to `Filter`.\n",
+ "They are passed as additional positional arguments or keyword arguments to the function.\n",
+ "\n",
+ "In this example, `has_duration` takes `plant` and `duration` as arguments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "example-3-filtering-with-multiple-arguments-code"
+ },
+ "outputs": [],
+ "source": [
+ "import apache_beam as beam\n",
+ "\n",
+ "def has_duration(plant, duration):\n",
+ " return plant['duration'] == duration\n",
+ "\n",
+ "with beam.Pipeline() as pipeline:\n",
+ " perennials = (\n",
+ " pipeline\n",
+ " | 'Gardening plants' >> beam.Create([\n",
+ " {'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'},\n",
+ " {'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'},\n",
+ " {'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'},\n",
+ " {'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'},\n",
+ " {'icon': '🥔', 'name': 'Potato', 'duration': 'perennial'},\n",
+ " ])\n",
+ " | 'Filter perennials' >> beam.Filter(has_duration, 'perennial')\n",
+ " | beam.Map(print)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-3-filtering-with-multiple-arguments-2"
+ },
+ "source": [
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-4-filtering-with-side-inputs-as-singletons"
+ },
+ "source": [
+ "### Example 4: Filtering with side inputs as singletons\n",
+ "\n",
+ "If the `PCollection` has a single value, such as the average from another computation,\n",
+ "passing the `PCollection` as a *singleton* accesses that value.\n",
+ "\n",
+ "In this example, we pass a `PCollection` the value `'perennial'` as a singleton.\n",
+ "We then use that value to filter out perennials."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "example-4-filtering-with-side-inputs-as-singletons-code"
+ },
+ "outputs": [],
+ "source": [
+ "import apache_beam as beam\n",
+ "\n",
+ "with beam.Pipeline() as pipeline:\n",
+ " perennial = pipeline | 'Perennial' >> beam.Create(['perennial'])\n",
+ "\n",
+ " perennials = (\n",
+ " pipeline\n",
+ " | 'Gardening plants' >> beam.Create([\n",
+ " {'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'},\n",
+ " {'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'},\n",
+ " {'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'},\n",
+ " {'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'},\n",
+ " {'icon': '🥔', 'name': 'Potato', 'duration': 'perennial'},\n",
+ " ])\n",
+ " | 'Filter perennials' >> beam.Filter(\n",
+ " lambda plant, duration: plant['duration'] == duration,\n",
+ " duration=beam.pvalue.AsSingleton(perennial),\n",
+ " )\n",
+ " | beam.Map(print)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-4-filtering-with-side-inputs-as-singletons-2"
+ },
+ "source": [
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-5-filtering-with-side-inputs-as-iterators"
+ },
+ "source": [
+ "### Example 5: Filtering with side inputs as iterators\n",
+ "\n",
+ "If the `PCollection` has multiple values, pass the `PCollection` as an *iterator*.\n",
+ "This accesses elements lazily as they are needed,\n",
+ "so it is possible to iterate over large `PCollection`s that won't fit into memory."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "example-5-filtering-with-side-inputs-as-iterators-code"
+ },
+ "outputs": [],
+ "source": [
+ "import apache_beam as beam\n",
+ "\n",
+ "with beam.Pipeline() as pipeline:\n",
+ " valid_durations = pipeline | 'Valid durations' >> beam.Create([\n",
+ " 'annual',\n",
+ " 'biennial',\n",
+ " 'perennial',\n",
+ " ])\n",
+ "\n",
+ " valid_plants = (\n",
+ " pipeline\n",
+ " | 'Gardening plants' >> beam.Create([\n",
+ " {'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'},\n",
+ " {'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'},\n",
+ " {'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'},\n",
+ " {'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'},\n",
+ " {'icon': '🥔', 'name': 'Potato', 'duration': 'PERENNIAL'},\n",
+ " ])\n",
+ " | 'Filter valid plants' >> beam.Filter(\n",
+ " lambda plant, valid_durations: plant['duration'] in valid_durations,\n",
+ " valid_durations=beam.pvalue.AsIter(valid_durations),\n",
+ " )\n",
+ " | beam.Map(print)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-5-filtering-with-side-inputs-as-iterators-2"
+ },
+ "source": [
+ "\n",
+ "
\n",
+ "\n",
+ "> **Note**: You can pass the `PCollection` as a *list* with `beam.pvalue.AsList(pcollection)`,\n",
+ "> but this requires that all the elements fit into memory."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-6-filtering-with-side-inputs-as-dictionaries"
+ },
+ "source": [
+ "### Example 6: Filtering with side inputs as dictionaries\n",
+ "\n",
+ "If a `PCollection` is small enough to fit into memory, then that `PCollection` can be passed as a *dictionary*.\n",
+ "Each element must be a `(key, value)` pair.\n",
+ "Note that all the elements of the `PCollection` must fit into memory for this.\n",
+ "If the `PCollection` won't fit into memory, use `beam.pvalue.AsIter(pcollection)` instead."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "example-6-filtering-with-side-inputs-as-dictionaries-code"
+ },
+ "outputs": [],
+ "source": [
+ "import apache_beam as beam\n",
+ "\n",
+ "with beam.Pipeline() as pipeline:\n",
+ " keep_duration = pipeline | 'Duration filters' >> beam.Create([\n",
+ " ('annual', False),\n",
+ " ('biennial', False),\n",
+ " ('perennial', True),\n",
+ " ])\n",
+ "\n",
+ " perennials = (\n",
+ " pipeline\n",
+ " | 'Gardening plants' >> beam.Create([\n",
+ " {'icon': '🍓', 'name': 'Strawberry', 'duration': 'perennial'},\n",
+ " {'icon': '🥕', 'name': 'Carrot', 'duration': 'biennial'},\n",
+ " {'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'},\n",
+ " {'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'},\n",
+ " {'icon': '🥔', 'name': 'Potato', 'duration': 'perennial'},\n",
+ " ])\n",
+ " | 'Filter plants by duration' >> beam.Filter(\n",
+ " lambda plant, keep_duration: keep_duration[plant['duration']],\n",
+ " keep_duration=beam.pvalue.AsDict(keep_duration),\n",
+ " )\n",
+ " | beam.Map(print)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "example-6-filtering-with-side-inputs-as-dictionaries-2"
+ },
+ "source": [
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "related-transforms"
+ },
+ "source": [
+ "## Related transforms\n",
+ "\n",
+ "* [FlatMap](https://beam.apache.org/documentation/transforms/python/elementwise/flatmap) behaves the same as `Map`, but for\n",
+ " each input it might produce zero or more outputs.\n",
+ "* [ParDo](https://beam.apache.org/documentation/transforms/python/elementwise/pardo) is the most general element-wise mapping\n",
+ " operation, and includes other abilities such as multiple output collections and side-inputs.\n",
+ "\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-the-docs-bottom"
+ },
+ "source": [
+ ""
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "name": "Filter - element-wise transform",
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "display_name": "python3",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/website/notebooks/docs.yaml b/website/notebooks/docs.yaml
new file mode 100644
index 0000000000000..99b0a4a4616cc
--- /dev/null
+++ b/website/notebooks/docs.yaml
@@ -0,0 +1,91 @@
+# Python transform catalog
+documentation/transforms/python/element-wise/filter:
+ title: Filter - element-wise transform
+ languages: py
+ imports:
+ 0: [license.md]
+ 1: [setup.md]
+
+# documentation/transforms/python/element-wise/flatmap:
+# title: FlatMap - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/keys:
+# title: Keys - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/kvswap:
+# title: KvSwap - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/map:
+# title: Map - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/pardo:
+# title: ParDo - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/partition:
+# title: Partition - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/regex:
+# title: Regex - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/reify:
+# title: Reify - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/tostring:
+# title: ToString - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/values:
+# title: Values - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/withkeys:
+# title: WithKeys - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
+
+# documentation/transforms/python/element-wise/withtimestamps:
+# title: WithTimestamps - element-wise transform
+# languages: py
+# imports:
+# 0: [license.md]
+# 1: [setup.md]
diff --git a/website/notebooks/generate.py b/website/notebooks/generate.py
new file mode 100644
index 0000000000000..ed9c029bb9d20
--- /dev/null
+++ b/website/notebooks/generate.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# To run, you will have to install `md2ipynb`.
+# pip install -U md2ipynb
+
+import argparse
+import md2ipynb
+import nbformat
+import os
+import yaml
+
+docs_logo_url = 'https://beam.apache.org/images/logos/full-color/name-bottom/beam-logo-full-color-name-bottom-100.png'
+
+
+def run(docs, variables=None, inputs_dir='.', outputs_dir='.', imports_dir='.'):
+ for basename, doc in docs.items():
+ languages=doc.get('languages', 'py java go').split()
+ for lang in languages:
+ ipynb_file = '/'.join([outputs_dir, '{}-{}.ipynb'.format(basename, lang)])
+ notebook = md2ipynb.new_notebook(
+ input_file=os.path.join(inputs_dir, basename + '.md'),
+ variables=variables,
+ imports={
+ i: [os.path.join(imports_dir, path) for path in imports]
+ for i, imports in doc.get('imports', {}).items()
+ },
+ notebook_title=doc.get('title', os.path.basename(basename).replace('-', ' ')),
+ keep_classes=['language-' + lang, 'shell-sh'],
+ docs_url='https://beam.apache.org/' + basename.replace('-', ''),
+ docs_logo_url=docs_logo_url,
+ github_ipynb_url='https://github.com/apache/beam/blob/master/' + ipynb_file,
+ )
+ output_dir = os.path.dirname(ipynb_file)
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ with open(ipynb_file, 'w') as f:
+ nbformat.write(notebook, f)
+
+
+if __name__ == '__main__':
+ script_dir = os.path.dirname(os.path.realpath(__file__))
+ root_dir = os.path.realpath(os.path.join(script_dir, '..', '..'))
+
+ docs_file = os.path.join(script_dir, 'docs.yaml')
+ with open(docs_file) as f:
+ docs = yaml.load(f.read())
+
+ variables_file = os.path.join(root_dir, 'website', '_config.yml')
+ with open(variables_file) as f:
+ variables = {'site': yaml.load(f.read())}
+ variables['site']['baseurl'] = variables['site']['url']
+
+ inputs_dir = os.path.join(root_dir, 'website', 'src')
+ outputs_dir = os.path.join(root_dir, 'examples', 'notebooks')
+ imports_dir = os.path.join(script_dir, 'imports')
+ run(docs, variables, inputs_dir, outputs_dir, imports_dir)
diff --git a/website/notebooks/imports/license.md b/website/notebooks/imports/license.md
new file mode 100644
index 0000000000000..a05bcd85152f3
--- /dev/null
+++ b/website/notebooks/imports/license.md
@@ -0,0 +1,19 @@
+```
+#@title Licensed under the Apache License, Version 2.0 (the "License")
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+```
diff --git a/website/notebooks/imports/setup.md b/website/notebooks/imports/setup.md
new file mode 100644
index 0000000000000..17f862b51b22d
--- /dev/null
+++ b/website/notebooks/imports/setup.md
@@ -0,0 +1,9 @@
+## Setup
+
+{:.language-py}
+First, let's install the `apache-beam` module.
+
+{:.language-py}
+```sh
+pip install --quiet -U apache-beam
+```
diff --git a/website/src/documentation/transforms/python/element-wise/filter.md b/website/src/documentation/transforms/python/element-wise/filter.md
index 5318f20ac4aa8..23e3af671b545 100644
--- a/website/src/documentation/transforms/python/element-wise/filter.md
+++ b/website/src/documentation/transforms/python/element-wise/filter.md
@@ -54,18 +54,32 @@ We define a function `is_perennial` which returns `True` if the element's durati
```py
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter.py tag:filter_function %}```
+{:.notebook-skip}
Output `PCollection` after `Filter`:
+{:.notebook-skip}
```
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter_test.py tag:perennials %}```
-
+{:.notebook-skip}
+
+
+
@@ -78,18 +92,32 @@ We can also use lambda functions to simplify **Example 1**.
```py
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter.py tag:filter_lambda %}```
+{:.notebook-skip}
Output `PCollection` after `Filter`:
+{:.notebook-skip}
```
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter_test.py tag:perennials %}```
-
+{:.notebook-skip}
+
+
+
@@ -105,18 +133,32 @@ In this example, `has_duration` takes `plant` and `duration` as arguments.
```py
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter.py tag:filter_multiple_arguments %}```
+{:.notebook-skip}
Output `PCollection` after `Filter`:
+{:.notebook-skip}
```
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter_test.py tag:perennials %}```
-
+{:.notebook-skip}
+
+
+
@@ -133,18 +175,32 @@ We then use that value to filter out perennials.
```py
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter.py tag:filter_side_inputs_singleton %}```
+{:.notebook-skip}
Output `PCollection` after `Filter`:
+{:.notebook-skip}
```
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter_test.py tag:perennials %}```
-
+{:.notebook-skip}
+
+
+
@@ -159,18 +215,32 @@ so it is possible to iterate over large `PCollection`s that won't fit into memor
```py
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter.py tag:filter_side_inputs_iter %}```
+{:.notebook-skip}
Output `PCollection` after `Filter`:
+{:.notebook-skip}
```
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter_test.py tag:valid_plants %}```
-
+{:.notebook-skip}
+
+
+
@@ -189,18 +259,32 @@ If the `PCollection` won't fit into memory, use `beam.pvalue.AsIter(pcollection)
```py
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter.py tag:filter_side_inputs_dict %}```
+{:.notebook-skip}
Output `PCollection` after `Filter`:
+{:.notebook-skip}
```
{% github_sample /apache/beam/blob/master/sdks/python/apache_beam/examples/snippets/transforms/element_wise/filter_test.py tag:perennials %}```
-