Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DebuggingWordCount now takes filter as an option #108

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
*/
package com.google.cloud.dataflow.examples;

import com.google.cloud.dataflow.examples.WordCount.WordCountOptions;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
import com.google.cloud.dataflow.sdk.options.Default;
import com.google.cloud.dataflow.sdk.options.Description;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
Expand Down Expand Up @@ -151,6 +152,21 @@ public void processElement(ProcessContext c) {
}
}

/**
* Options supported by {@link DebuggingWordCount}.
*
* <p>Inherits standard configuration options and all options defined in
* {@link WordCount.WordCountOptions}.
*/
public static interface WordCountOptions extends WordCount.WordCountOptions {

@Description("Regex filter pattern to use in DebuggingWordCount. "
+ "Only words matching this pattern will be counted.")
@Default.String("Flourish|stomach")
String getFilterPattern();
void setFilterPattern(String value);
}

public static void main(String[] args) {
WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
.as(WordCountOptions.class);
Expand All @@ -159,7 +175,7 @@ public static void main(String[] args) {
PCollection<KV<String, Long>> filteredWords =
p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
.apply(new WordCount.CountWords())
.apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
.apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));

/**
* Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
*/
package ${package};

import ${package}.WordCount.WordCountOptions;
import ${package}.WordCount;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
import com.google.cloud.dataflow.sdk.options.Default;
import com.google.cloud.dataflow.sdk.options.Description;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
Expand Down Expand Up @@ -150,6 +152,21 @@ public void processElement(ProcessContext c) {
}
}
}

/**
* Options supported by {@link DebuggingWordCount}.
*
* <p>Inherits standard configuration options and all options defined in
* {@link WordCount.WordCountOptions}.
*/
public static interface WordCountOptions extends WordCount.WordCountOptions {

@Description("Regex filter pattern to use in DebuggingWordCount. "
+ "Only words matching this pattern will be counted.")
@Default.String("Flourish|stomach")
String getFilterPattern();
void setFilterPattern(String value);
}

public static void main(String[] args) {
WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
Expand All @@ -159,7 +176,7 @@ public static void main(String[] args) {
PCollection<KV<String, Long>> filteredWords =
p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
.apply(new WordCount.CountWords())
.apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
.apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));

/**
* Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
Expand Down