From 374c6477f053b67e3edb4743168f82157628c558 Mon Sep 17 00:00:00 2001 From: bchambers Date: Thu, 31 Mar 2016 15:18:09 -0700 Subject: [PATCH 1/2] DebuggingWordCount now takes filter as an option Previously it was hard-coded as "Flourish|stomach". Now it is a PipelineOption with that as the default. This allows "breaking" the pipeline by mis-specifying the pattern without changing the code. --- .../google/cloud/dataflow/examples/DebuggingWordCount.java | 2 +- .../java/com/google/cloud/dataflow/examples/WordCount.java | 6 ++++++ .../src/main/java/DebuggingWordCount.java | 2 +- .../archetype-resources/src/main/java/WordCount.java | 6 ++++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java index 1f7618131f4ef..62361999c3f41 100644 --- a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java @@ -159,7 +159,7 @@ public static void main(String[] args) { PCollection> filteredWords = p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile())) .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn("Flourish|stomach"))); + .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); /** * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java index ad08d13a198f2..94dacedfc32c8 100644 --- a/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java +++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java @@ -172,6 +172,12 @@ public static interface WordCountOptions extends PipelineOptions { String getOutput(); void setOutput(String value); + @Description("Regex filter pattern to use in DebuggingWordCount. " + + "Only words matching this pattern will be counted.") + @Default.String("Flourish|stomach") + String getFilterPattern(); + void setFilterPattern(String value); + /** * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination. */ diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java index 905670faa7777..4e97e9910cdaa 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java @@ -159,7 +159,7 @@ public static void main(String[] args) { PCollection> filteredWords = p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile())) .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn("Flourish|stomach"))); + .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); /** * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java index 58cc04c635bb9..40ebdd4e0fc45 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java @@ -169,6 +169,12 @@ public static interface WordCountOptions extends PipelineOptions { @Default.InstanceFactory(OutputFactory.class) String getOutput(); void setOutput(String value); + + @Description("Regex filter pattern to use in DebuggingWordCount. " + + "Only words matching this pattern will be counted.") + @Default.String("Flourish|stomach") + String getFilterPattern(); + void setFilterPattern(String value); /** * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination. From 7d3ee15521ead5659275de59586b8f90dc613f8e Mon Sep 17 00:00:00 2001 From: bchambers Date: Thu, 31 Mar 2016 17:32:57 -0700 Subject: [PATCH 2/2] fixup --- .../dataflow/examples/DebuggingWordCount.java | 18 +++++++++++++++++- .../cloud/dataflow/examples/WordCount.java | 6 ------ .../src/main/java/DebuggingWordCount.java | 19 ++++++++++++++++++- .../src/main/java/WordCount.java | 6 ------ 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java index 62361999c3f41..331d7c6f46929 100644 --- a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java @@ -17,9 +17,10 @@ */ package com.google.cloud.dataflow.examples; -import com.google.cloud.dataflow.examples.WordCount.WordCountOptions; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.io.TextIO; +import com.google.cloud.dataflow.sdk.options.Default; +import com.google.cloud.dataflow.sdk.options.Description; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; import com.google.cloud.dataflow.sdk.transforms.Aggregator; @@ -151,6 +152,21 @@ public void processElement(ProcessContext c) { } } + /** + * Options supported by {@link DebuggingWordCount}. + * + *

Inherits standard configuration options and all options defined in + * {@link WordCount.WordCountOptions}. + */ + public static interface WordCountOptions extends WordCount.WordCountOptions { + + @Description("Regex filter pattern to use in DebuggingWordCount. " + + "Only words matching this pattern will be counted.") + @Default.String("Flourish|stomach") + String getFilterPattern(); + void setFilterPattern(String value); + } + public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(WordCountOptions.class); diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java index 94dacedfc32c8..ad08d13a198f2 100644 --- a/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java +++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/WordCount.java @@ -172,12 +172,6 @@ public static interface WordCountOptions extends PipelineOptions { String getOutput(); void setOutput(String value); - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); - /** * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination. */ diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java index 4e97e9910cdaa..32fca4e4f89a2 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java @@ -17,9 +17,11 @@ */ package ${package}; -import ${package}.WordCount.WordCountOptions; +import ${package}.WordCount; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.io.TextIO; +import com.google.cloud.dataflow.sdk.options.Default; +import com.google.cloud.dataflow.sdk.options.Description; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; import com.google.cloud.dataflow.sdk.transforms.Aggregator; @@ -150,6 +152,21 @@ public void processElement(ProcessContext c) { } } } + + /** + * Options supported by {@link DebuggingWordCount}. + * + *

Inherits standard configuration options and all options defined in + * {@link WordCount.WordCountOptions}. + */ + public static interface WordCountOptions extends WordCount.WordCountOptions { + + @Description("Regex filter pattern to use in DebuggingWordCount. " + + "Only words matching this pattern will be counted.") + @Default.String("Flourish|stomach") + String getFilterPattern(); + void setFilterPattern(String value); + } public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java index 40ebdd4e0fc45..58cc04c635bb9 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/WordCount.java @@ -169,12 +169,6 @@ public static interface WordCountOptions extends PipelineOptions { @Default.InstanceFactory(OutputFactory.class) String getOutput(); void setOutput(String value); - - @Description("Regex filter pattern to use in DebuggingWordCount. " - + "Only words matching this pattern will be counted.") - @Default.String("Flourish|stomach") - String getFilterPattern(); - void setFilterPattern(String value); /** * Returns "gs://${YOUR_STAGING_DIRECTORY}/counts.txt" as the default destination.