From 368792917bfcadef82a2d767aca51c0da8bbdf31 Mon Sep 17 00:00:00 2001 From: Sela Date: Sat, 12 Mar 2016 00:37:55 +0200 Subject: [PATCH 01/10] [BEAM-11] Spark runner directory structure and pom setup. --- runners/spark/pom.xml | 272 +++++++++--------- .../beam/runners}/spark/DoFnFunction.java | 3 +- .../runners}/spark/EvaluationContext.java | 3 +- .../beam/runners}/spark/EvaluationResult.java | 2 +- .../runners}/spark/MultiDoFnFunction.java | 3 +- .../runners}/spark/SparkContextFactory.java | 2 +- .../spark/SparkPipelineEvaluator.java | 2 +- .../runners}/spark/SparkPipelineOptions.java | 2 +- .../spark/SparkPipelineOptionsFactory.java | 2 +- .../spark/SparkPipelineOptionsRegistrar.java | 2 +- .../runners}/spark/SparkPipelineRunner.java | 12 +- .../spark/SparkPipelineRunnerRegistrar.java | 2 +- .../spark/SparkPipelineTranslator.java | 2 +- .../runners}/spark/SparkProcessContext.java | 9 +- .../runners}/spark/SparkRuntimeContext.java | 8 +- .../runners}/spark/TransformEvaluator.java | 2 +- .../runners}/spark/TransformTranslator.java | 21 +- .../beam/runners}/spark/WindowingHelpers.java | 2 +- .../spark/aggregators/AggAccumParam.java | 2 +- .../spark/aggregators/NamedAggregators.java | 4 +- .../runners/spark/coders}/CoderHelpers.java | 19 +- .../spark/coders}/NullWritableCoder.java | 2 +- .../runners/spark/coders}/WritableCoder.java | 2 +- .../beam/runners/spark}/io/ConsoleIO.java | 2 +- .../beam/runners/spark}/io/CreateStream.java | 2 +- .../beam/runners/spark}/io/KafkaIO.java | 2 +- .../runners/spark/io}/hadoop/HadoopIO.java | 4 +- .../spark/io/hadoop}/ShardNameBuilder.java | 4 +- .../io/hadoop}/ShardNameTemplateAware.java | 4 +- .../io/hadoop}/ShardNameTemplateHelper.java | 6 +- .../hadoop}/TemplatedAvroKeyOutputFormat.java | 2 +- .../TemplatedSequenceFileOutputFormat.java | 2 +- .../io/hadoop}/TemplatedTextOutputFormat.java | 2 +- .../SparkStreamingPipelineOptions.java | 4 +- .../SparkStreamingPipelineOptionsFactory.java | 2 +- ...parkStreamingPipelineOptionsRegistrar.java | 2 +- .../streaming/StreamingEvaluationContext.java | 8 +- .../StreamingTransformTranslator.java | 25 +- .../StreamingWindowPipelineDetector.java | 9 +- .../runners/spark/util}/BroadcastHelper.java | 9 +- .../beam/runners/spark/util}/ByteArray.java | 6 +- ...aflow.sdk.options.PipelineOptionsRegistrar | 4 +- ...taflow.sdk.runners.PipelineRunnerRegistrar | 2 +- .../runners}/spark/CombineGloballyTest.java | 5 +- .../runners}/spark/CombinePerKeyTest.java | 8 +- .../apache/beam/runners}/spark/DeDupTest.java | 5 +- .../beam/runners}/spark/DoFnOutputTest.java | 5 +- .../beam/runners}/spark/EmptyInputTest.java | 5 +- .../spark/MultiOutputWordCountTest.java | 22 +- .../runners}/spark/SerializationTest.java | 14 +- .../beam/runners}/spark/SideEffectsTest.java | 11 +- .../runners}/spark/SimpleWordCountTest.java | 14 +- .../TestSparkPipelineOptionsFactory.java | 2 +- .../apache/beam/runners}/spark/TfIdfTest.java | 5 +- .../spark/TransformTranslatorTest.java | 4 +- .../runners}/spark/WindowedWordCountTest.java | 3 +- .../spark/coders}/WritableCoderTest.java | 2 +- .../runners/spark/io}/AvroPipelineTest.java | 4 +- .../beam/runners/spark/io}/NumShardsTest.java | 16 +- .../hadoop}/HadoopFileFormatPipelineTest.java | 10 +- .../io/hadoop}/ShardNameBuilderTest.java | 12 +- .../spark/streaming/FlattenStreamingTest.java | 10 +- .../spark/streaming/KafkaStreamingTest.java | 20 +- .../SimpleStreamingWordCountTest.java | 12 +- .../utils/DataflowAssertStreaming.java | 4 +- .../streaming/utils/EmbeddedKafkaCluster.java | 2 +- 66 files changed, 335 insertions(+), 341 deletions(-) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/DoFnFunction.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/EvaluationContext.java (99%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/EvaluationResult.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/MultiDoFnFunction.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkContextFactory.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineEvaluator.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineOptions.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineOptionsFactory.java (95%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineOptionsRegistrar.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineRunner.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineRunnerRegistrar.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkPipelineTranslator.java (95%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkProcessContext.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SparkRuntimeContext.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/TransformEvaluator.java (95%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/TransformTranslator.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/WindowingHelpers.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/aggregators/AggAccumParam.java (95%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/aggregators/NamedAggregators.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/coders}/CoderHelpers.java (89%) rename runners/spark/src/main/java/{com/cloudera/dataflow/hadoop => org/apache/beam/runners/spark/coders}/NullWritableCoder.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow/hadoop => org/apache/beam/runners/spark/coders}/WritableCoder.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners/spark}/io/ConsoleIO.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners/spark}/io/CreateStream.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners/spark}/io/KafkaIO.java (99%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners/spark/io}/hadoop/HadoopIO.java (98%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/ShardNameBuilder.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/ShardNameTemplateAware.java (90%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/ShardNameTemplateHelper.java (93%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/TemplatedAvroKeyOutputFormat.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/TemplatedSequenceFileOutputFormat.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/TemplatedTextOutputFormat.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/SparkStreamingPipelineOptions.java (91%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/SparkStreamingPipelineOptionsFactory.java (94%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java (95%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/StreamingEvaluationContext.java (97%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/StreamingTransformTranslator.java (96%) rename runners/spark/src/main/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/StreamingWindowPipelineDetector.java (94%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/util}/BroadcastHelper.java (92%) rename runners/spark/src/main/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/util}/ByteArray.java (89%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/CombineGloballyTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/CombinePerKeyTest.java (89%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/DeDupTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/DoFnOutputTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/EmptyInputTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/MultiOutputWordCountTest.java (83%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SerializationTest.java (93%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SideEffectsTest.java (93%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/SimpleWordCountTest.java (89%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/TestSparkPipelineOptionsFactory.java (96%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/TfIdfTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/TransformTranslatorTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/WindowedWordCountTest.java (98%) rename runners/spark/src/test/java/{com/cloudera/dataflow/hadoop => org/apache/beam/runners/spark/coders}/WritableCoderTest.java (96%) rename runners/spark/src/test/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io}/AvroPipelineTest.java (96%) rename runners/spark/src/test/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io}/NumShardsTest.java (91%) rename runners/spark/src/test/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/HadoopFileFormatPipelineTest.java (95%) rename runners/spark/src/test/java/{com/cloudera/dataflow/spark => org/apache/beam/runners/spark/io/hadoop}/ShardNameBuilderTest.java (86%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/FlattenStreamingTest.java (91%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/KafkaStreamingTest.java (88%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/SimpleStreamingWordCountTest.java (88%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/utils/DataflowAssertStreaming.java (92%) rename runners/spark/src/test/java/{com/cloudera/dataflow => org/apache/beam/runners}/spark/streaming/utils/EmbeddedKafkaCluster.java (99%) diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml index 399e9e77ad34a..a0601611daee8 100644 --- a/runners/spark/pom.xml +++ b/runners/spark/pom.xml @@ -11,21 +11,136 @@ the specific language governing permissions and limitations under the License. --> + 4.0.0 - Dataflow on Spark - com.cloudera.dataflow.spark - spark-dataflow + + + org.apache.beam + runners + 1.5.0-SNAPSHOT + + + spark-runner 0.4.3-SNAPSHOT + + Spark Beam Runner jar + 2014 + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + UTF-8 UTF-8 1.7 1.5.2 - 1.3.0 + 1.5.0-SNAPSHOT + + + apache.snapshots + Apache Development Snapshot Repository + https://repository.apache.org/content/repositories/snapshots/ + + false + + + true + + + + + + + org.apache.spark + spark-core_2.10 + ${spark.version} + provided + + + org.apache.spark + spark-streaming_2.10 + ${spark.version} + provided + + + org.apache.spark + spark-streaming-kafka_2.10 + ${spark.version} + provided + + + org.apache.kafka + kafka_2.10 + 0.8.2.1 + provided + + + com.google.guava + guava + ${guava.version} + + + com.google.cloud.dataflow + google-cloud-dataflow-java-sdk-all + ${beam.version} + + + + org.slf4j + slf4j-jdk14 + + + + + com.google.cloud.dataflow + google-cloud-dataflow-java-examples-all + ${beam.version} + + + + org.slf4j + slf4j-jdk14 + + + + + org.apache.avro + avro-mapred + 1.7.7 + hadoop2 + + + + org.mortbay.jetty + servlet-api + + + + + + + junit + junit + 4.12 + test + + + org.hamcrest + hamcrest-all + 1.3 + test + + + @@ -231,20 +346,20 @@ License. shade - - - - - com.google.common - com.cloudera.dataflow.spark.relocated.com.google.common - - - true - spark-app - - - - + + + + + + + + + + + + + + @@ -274,89 +389,6 @@ License. - - - org.apache.spark - spark-core_2.10 - ${spark.version} - provided - - - org.apache.spark - spark-streaming_2.10 - ${spark.version} - provided - - - org.apache.spark - spark-streaming-kafka_2.10 - ${spark.version} - provided - - - org.apache.kafka - kafka_2.10 - 0.8.2.1 - provided - - - com.google.guava - guava - 18.0 - - - com.google.cloud.dataflow - google-cloud-dataflow-java-sdk-all - ${google-cloud-dataflow-version} - - - - org.slf4j - slf4j-jdk14 - - - - - com.google.cloud.dataflow - google-cloud-dataflow-java-examples-all - ${google-cloud-dataflow-version} - - - - org.slf4j - slf4j-jdk14 - - - - - org.apache.avro - avro-mapred - 1.7.7 - hadoop2 - - - - org.mortbay.jetty - servlet-api - - - - - - - junit - junit - 4.12 - test - - - org.hamcrest - hamcrest-all - 1.3 - test - - - @@ -380,25 +412,12 @@ License. - http://github.com/cloudera/spark-dataflow - 2014 - - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - Cloudera, Inc. - - GitHub - https://github.com/cloudera/spark-dataflow/issues - scm:git:https://github.com/cloudera/spark-dataflow.git scm:git:https://github.com/cloudera/spark-dataflow.git @@ -410,31 +429,6 @@ License. 3.2.1 - - - cloudera.repo - https://repository.cloudera.com/artifactory/cloudera-repos - Cloudera Repositories - - true - - - true - - - - - - - cloudera.repo - https://repository.cloudera.com/artifactory/libs-release-local - - - cloudera.snapshots.repo - https://repository.cloudera.com/artifactory/libs-snapshot-local - - - release-sign-artifacts diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/DoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/DoFnFunction.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/DoFnFunction.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/DoFnFunction.java index 2bcfec3dfc43e..6af829fcdeb47 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/DoFnFunction.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/DoFnFunction.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import java.util.Iterator; import java.util.LinkedList; @@ -23,6 +23,7 @@ import com.google.cloud.dataflow.sdk.transforms.DoFn; import com.google.cloud.dataflow.sdk.util.WindowedValue; import com.google.cloud.dataflow.sdk.values.TupleTag; +import org.apache.beam.runners.spark.util.BroadcastHelper; import org.apache.spark.api.java.function.FlatMapFunction; /** diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/EvaluationContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/EvaluationContext.java similarity index 99% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/EvaluationContext.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/EvaluationContext.java index a6ac6c2f3e867..836987f0e92ee 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/EvaluationContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/EvaluationContext.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import static com.google.common.base.Preconditions.checkArgument; @@ -38,6 +38,7 @@ import com.google.cloud.dataflow.sdk.values.PValue; import com.google.common.base.Function; import com.google.common.collect.Iterables; +import org.apache.beam.runners.spark.coders.CoderHelpers; import org.apache.spark.api.java.JavaRDDLike; import org.apache.spark.api.java.JavaSparkContext; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/EvaluationResult.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/EvaluationResult.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/EvaluationResult.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/EvaluationResult.java index aad029aec17d0..4de97f62b07e2 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/EvaluationResult.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/EvaluationResult.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.PipelineResult; import com.google.cloud.dataflow.sdk.values.PCollection; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/MultiDoFnFunction.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/MultiDoFnFunction.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/MultiDoFnFunction.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/MultiDoFnFunction.java index d269788bfda31..968825b0b1c9a 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/MultiDoFnFunction.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/MultiDoFnFunction.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import java.util.Iterator; import java.util.Map; @@ -25,6 +25,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.LinkedListMultimap; import com.google.common.collect.Multimap; +import org.apache.beam.runners.spark.util.BroadcastHelper; import org.apache.spark.api.java.function.PairFlatMapFunction; import org.joda.time.Instant; import scala.Tuple2; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkContextFactory.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkContextFactory.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkContextFactory.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkContextFactory.java index d3e8c9b1cabf3..10b73698aaf03 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkContextFactory.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkContextFactory.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineEvaluator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineEvaluator.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineEvaluator.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineEvaluator.java index 67621801cf4c1..913e5a12030d3 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineEvaluator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineEvaluator.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.runners.TransformTreeNode; import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptions.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptions.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java index e96162ec3d034..1a5093b96838c 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptions.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptions.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.options.ApplicationNameOptions; import com.google.cloud.dataflow.sdk.options.Default; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptionsFactory.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptionsFactory.java similarity index 95% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptionsFactory.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptionsFactory.java index 89cd0302b5220..7b44ee4ebaa3c 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptionsFactory.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptionsFactory.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptionsRegistrar.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptionsRegistrar.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptionsRegistrar.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptionsRegistrar.java index 21fe6932990f2..9f7f8c1644db8 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineOptionsRegistrar.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineOptionsRegistrar.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.options.PipelineOptions; import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineRunner.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineRunner.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java index a9c2d860b8fe0..429750d5def8e 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineRunner.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunner.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.options.PipelineOptions; @@ -25,6 +25,11 @@ import com.google.cloud.dataflow.sdk.values.POutput; import com.google.cloud.dataflow.sdk.values.PValue; +import org.apache.beam.runners.spark.streaming.SparkStreamingPipelineOptions; +import org.apache.beam.runners.spark.streaming.StreamingEvaluationContext; +import org.apache.beam.runners.spark.streaming.StreamingTransformTranslator; +import org.apache.beam.runners.spark.streaming.StreamingWindowPipelineDetector; + import org.apache.spark.SparkException; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.streaming.Duration; @@ -32,11 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.cloudera.dataflow.spark.streaming.SparkStreamingPipelineOptions; -import com.cloudera.dataflow.spark.streaming.StreamingEvaluationContext; -import com.cloudera.dataflow.spark.streaming.StreamingTransformTranslator; -import com.cloudera.dataflow.spark.streaming.StreamingWindowPipelineDetector; - /** * The SparkPipelineRunner translate operations defined on a pipeline to a representation * executable by Spark, and then submitting the job to Spark to be executed. If we wanted to run diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineRunnerRegistrar.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunnerRegistrar.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineRunnerRegistrar.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunnerRegistrar.java index 5bdd3222bfbdc..9a843702493dd 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineRunnerRegistrar.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineRunnerRegistrar.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.runners.PipelineRunner; import com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineTranslator.java similarity index 95% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineTranslator.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineTranslator.java index d90363f43c740..e45491ac1bf01 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkPipelineTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkPipelineTranslator.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.transforms.PTransform; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkProcessContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkProcessContext.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java index 73cec25e24897..c63415233904f 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkProcessContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import java.io.IOException; import java.util.Collection; @@ -36,6 +36,7 @@ import com.google.common.collect.AbstractIterator; import com.google.common.collect.Iterables; +import org.apache.beam.runners.spark.util.BroadcastHelper; import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -174,6 +175,12 @@ public void writePCollectionViewData(TupleTag tag, throw new UnsupportedOperationException( "WindowingInternals#writePCollectionViewData() is not yet supported."); } + + @Override + public T sideInput(PCollectionView view, BoundedWindow mainInputWindow) { + throw new UnsupportedOperationException( + "WindowingInternals#sideInput() is not yet supported."); + } }; } diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkRuntimeContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRuntimeContext.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkRuntimeContext.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRuntimeContext.java index ec590a96fa18d..da48ad7bcdf6f 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/SparkRuntimeContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkRuntimeContext.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import java.io.IOException; import java.io.Serializable; @@ -36,11 +36,13 @@ import com.google.cloud.dataflow.sdk.transforms.Sum; import com.google.cloud.dataflow.sdk.values.TypeDescriptor; import com.google.common.collect.ImmutableList; + +import org.apache.beam.runners.spark.aggregators.AggAccumParam; +import org.apache.beam.runners.spark.aggregators.NamedAggregators; + import org.apache.spark.Accumulator; import org.apache.spark.api.java.JavaSparkContext; -import com.cloudera.dataflow.spark.aggregators.AggAccumParam; -import com.cloudera.dataflow.spark.aggregators.NamedAggregators; /** * The SparkRuntimeContext allows us to define useful features on the client side before our diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TransformEvaluator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/TransformEvaluator.java similarity index 95% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/TransformEvaluator.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/TransformEvaluator.java index 52842d575edeb..8aaceeb904ebf 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TransformEvaluator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/TransformEvaluator.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import java.io.Serializable; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/TransformTranslator.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/TransformTranslator.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/TransformTranslator.java index 58b1924d555c3..e64f89a4c0428 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TransformTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/TransformTranslator.java @@ -13,7 +13,12 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; + +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputDirectory; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFilePrefix; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFileTemplate; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.replaceShardCount; import java.io.IOException; import java.lang.reflect.Field; @@ -53,6 +58,13 @@ import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapreduce.AvroJob; import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.beam.runners.spark.coders.CoderHelpers; +import org.apache.beam.runners.spark.io.hadoop.HadoopIO; +import org.apache.beam.runners.spark.io.hadoop.ShardNameTemplateHelper; +import org.apache.beam.runners.spark.io.hadoop.TemplatedAvroKeyOutputFormat; +import org.apache.beam.runners.spark.io.hadoop.TemplatedTextOutputFormat; +import org.apache.beam.runners.spark.util.BroadcastHelper; +import org.apache.beam.runners.spark.util.ByteArray; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; @@ -67,13 +79,6 @@ import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; -import static com.cloudera.dataflow.spark.ShardNameBuilder.getOutputDirectory; -import static com.cloudera.dataflow.spark.ShardNameBuilder.getOutputFilePrefix; -import static com.cloudera.dataflow.spark.ShardNameBuilder.getOutputFileTemplate; -import static com.cloudera.dataflow.spark.ShardNameBuilder.replaceShardCount; - -import com.cloudera.dataflow.hadoop.HadoopIO; - /** * Supports translation between a DataFlow transform, and Spark's operations on RDDs. */ diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/WindowingHelpers.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/WindowingHelpers.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/WindowingHelpers.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/WindowingHelpers.java index 90600b261f280..6b904f7d1527c 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/WindowingHelpers.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/WindowingHelpers.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.util.WindowedValue; import org.apache.spark.api.java.function.Function; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/aggregators/AggAccumParam.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggAccumParam.java similarity index 95% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/aggregators/AggAccumParam.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggAccumParam.java index a3055a256ded0..a82dbbec2092a 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/aggregators/AggAccumParam.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/AggAccumParam.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark.aggregators; +package org.apache.beam.runners.spark.aggregators; import org.apache.spark.AccumulatorParam; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/aggregators/NamedAggregators.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/aggregators/NamedAggregators.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java index d51e404491044..2747703ece8e6 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/aggregators/NamedAggregators.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/aggregators/NamedAggregators.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark.aggregators; +package org.apache.beam.runners.spark.aggregators; import java.io.IOException; import java.io.ObjectInputStream; @@ -27,7 +27,7 @@ import com.google.cloud.dataflow.sdk.transforms.Combine; import com.google.common.collect.ImmutableList; -import com.cloudera.dataflow.spark.SparkRuntimeContext; +import org.apache.beam.runners.spark.SparkRuntimeContext; /** * This class wraps a map of named aggregators. Spark expects that all accumulators be declared diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/CoderHelpers.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/coders/CoderHelpers.java similarity index 89% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/CoderHelpers.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/coders/CoderHelpers.java index 0ae06c1a9e491..7d75e7dc3fe16 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/CoderHelpers.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/coders/CoderHelpers.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.coders; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -23,6 +23,7 @@ import com.google.cloud.dataflow.sdk.coders.Coder; import com.google.common.collect.Iterables; +import org.apache.beam.runners.spark.util.ByteArray; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; @@ -42,7 +43,7 @@ private CoderHelpers() { * @param type of value that is serialized * @return Byte array representing serialized object. */ - static byte[] toByteArray(T value, Coder coder) { + public static byte[] toByteArray(T value, Coder coder) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try { coder.encode(value, baos, new Coder.Context(true)); @@ -60,7 +61,7 @@ static byte[] toByteArray(T value, Coder coder) { * @param type of value that is serialized * @return List of bytes representing serialized objects. */ - static List toByteArrays(Iterable values, Coder coder) { + public static List toByteArrays(Iterable values, Coder coder) { List res = new LinkedList<>(); for (T value : values) { res.add(toByteArray(value, coder)); @@ -76,7 +77,7 @@ static List toByteArrays(Iterable values, Coder coder) { * @param Type of object to be returned. * @return Deserialized object. */ - static T fromByteArray(byte[] serialized, Coder coder) { + public static T fromByteArray(byte[] serialized, Coder coder) { ByteArrayInputStream bais = new ByteArrayInputStream(serialized); try { return coder.decode(bais, new Coder.Context(true)); @@ -92,7 +93,7 @@ static T fromByteArray(byte[] serialized, Coder coder) { * @param The type of the object being serialized. * @return A function that accepts an object and returns its coder-serialized form. */ - static Function toByteFunction(final Coder coder) { + public static Function toByteFunction(final Coder coder) { return new Function() { @Override public byte[] call(T t) throws Exception { @@ -108,7 +109,7 @@ public byte[] call(T t) throws Exception { * @param The type of the object being deserialized. * @return A function that accepts a byte array and returns its corresponding object. */ - static Function fromByteFunction(final Coder coder) { + public static Function fromByteFunction(final Coder coder) { return new Function() { @Override public T call(byte[] bytes) throws Exception { @@ -126,7 +127,7 @@ public T call(byte[] bytes) throws Exception { * @param The type of the value being serialized. * @return A function that accepts a key-value pair and returns a pair of byte arrays. */ - static PairFunction, ByteArray, byte[]> toByteFunction( + public static PairFunction, ByteArray, byte[]> toByteFunction( final Coder keyCoder, final Coder valueCoder) { return new PairFunction, ByteArray, byte[]>() { @Override @@ -146,7 +147,7 @@ public Tuple2 call(Tuple2 kv) { * @param The type of the value being deserialized. * @return A function that accepts a pair of byte arrays and returns a key-value pair. */ - static PairFunction, K, V> fromByteFunction( + public static PairFunction, K, V> fromByteFunction( final Coder keyCoder, final Coder valueCoder) { return new PairFunction, K, V>() { @Override @@ -167,7 +168,7 @@ public Tuple2 call(Tuple2 tuple) { * @param The type of the value being deserialized. * @return A function that accepts a pair of byte arrays and returns a key-value pair. */ - static PairFunction>, K, Iterable> + public static PairFunction>, K, Iterable> fromByteFunctionIterable(final Coder keyCoder, final Coder valueCoder) { return new PairFunction>, K, Iterable>() { @Override diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/hadoop/NullWritableCoder.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/coders/NullWritableCoder.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/hadoop/NullWritableCoder.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/coders/NullWritableCoder.java index 5e5d3919ac53d..5b77e975302cb 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/hadoop/NullWritableCoder.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/coders/NullWritableCoder.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.hadoop; +package org.apache.beam.runners.spark.coders; import java.io.InputStream; import java.io.OutputStream; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/hadoop/WritableCoder.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/coders/WritableCoder.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/hadoop/WritableCoder.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/coders/WritableCoder.java index 324b203a2f152..fa73753cf22f1 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/hadoop/WritableCoder.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/coders/WritableCoder.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.hadoop; +package org.apache.beam.runners.spark.coders; import java.io.DataInputStream; import java.io.DataOutputStream; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/io/ConsoleIO.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/ConsoleIO.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/io/ConsoleIO.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/ConsoleIO.java index bc19b39068a50..2ee072a0cfce4 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/io/ConsoleIO.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/ConsoleIO.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.io; +package org.apache.beam.runners.spark.io; import com.google.cloud.dataflow.sdk.transforms.PTransform; import com.google.cloud.dataflow.sdk.values.PCollection; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/io/CreateStream.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/CreateStream.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/io/CreateStream.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/CreateStream.java index 9a9927873222b..c92f8bfa940e4 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/io/CreateStream.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/CreateStream.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.io; +package org.apache.beam.runners.spark.io; import com.google.cloud.dataflow.sdk.transforms.PTransform; import com.google.cloud.dataflow.sdk.util.WindowingStrategy; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/io/KafkaIO.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/KafkaIO.java similarity index 99% rename from runners/spark/src/main/java/com/cloudera/dataflow/io/KafkaIO.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/KafkaIO.java index 154e6dacf37ef..979815745bcf4 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/io/KafkaIO.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/KafkaIO.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.io; +package org.apache.beam.runners.spark.io; import java.util.Map; import java.util.Set; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/hadoop/HadoopIO.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/HadoopIO.java similarity index 98% rename from runners/spark/src/main/java/com/cloudera/dataflow/hadoop/HadoopIO.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/HadoopIO.java index c79f2113cc510..e8d2aa1bdc86b 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/hadoop/HadoopIO.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/HadoopIO.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.hadoop; +package org.apache.beam.runners.spark.io.hadoop; import java.util.HashMap; import java.util.Map; @@ -28,8 +28,6 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import com.cloudera.dataflow.spark.ShardNameTemplateAware; - public final class HadoopIO { private HadoopIO() { diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameBuilder.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameBuilder.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameBuilder.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameBuilder.java index f53b6d906e55c..21c798515319c 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameBuilder.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameBuilder.java @@ -13,14 +13,14 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.fs.Path; -final class ShardNameBuilder { +public final class ShardNameBuilder { private ShardNameBuilder() { } diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameTemplateAware.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateAware.java similarity index 90% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameTemplateAware.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateAware.java index bb9a7a558e851..fdee42b53b35e 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameTemplateAware.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateAware.java @@ -13,13 +13,13 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; /** * A marker interface that implementations of * {@link org.apache.hadoop.mapreduce.lib.output.FileOutputFormat} implement to indicate * that they produce shard names that adhere to the template in - * {@link com.cloudera.dataflow.hadoop.HadoopIO.Write}. + * {@link HadoopIO.Write}. * * Some common shard names are defined in * {@link com.google.cloud.dataflow.sdk.io.ShardNameTemplate}. diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameTemplateHelper.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java similarity index 93% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameTemplateHelper.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java index 56980a1e0e565..fd6f5dae14db8 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ShardNameTemplateHelper.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/ShardNameTemplateHelper.java @@ -13,10 +13,12 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; import java.io.IOException; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.replaceShardNumber; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskID; @@ -25,8 +27,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static com.cloudera.dataflow.spark.ShardNameBuilder.replaceShardNumber; - public final class ShardNameTemplateHelper { private static final Logger LOG = LoggerFactory.getLogger(ShardNameTemplateHelper.class); diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedAvroKeyOutputFormat.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedAvroKeyOutputFormat.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedAvroKeyOutputFormat.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedAvroKeyOutputFormat.java index ef24137b9f62a..4feaff6f63ff6 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedAvroKeyOutputFormat.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedAvroKeyOutputFormat.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; import java.io.IOException; import java.io.OutputStream; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedSequenceFileOutputFormat.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedSequenceFileOutputFormat.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedSequenceFileOutputFormat.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedSequenceFileOutputFormat.java index 3ab07b5a95b19..922b906d7bf08 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedSequenceFileOutputFormat.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedSequenceFileOutputFormat.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; import java.io.IOException; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedTextOutputFormat.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedTextOutputFormat.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedTextOutputFormat.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedTextOutputFormat.java index a8e218d40bf32..1e53dce89b6c6 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/TemplatedTextOutputFormat.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/io/hadoop/TemplatedTextOutputFormat.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; import java.io.IOException; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptions.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptions.java similarity index 91% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptions.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptions.java index 57253f09b3ec1..17edba3aa655d 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptions.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptions.java @@ -12,12 +12,12 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.options.Default; import com.google.cloud.dataflow.sdk.options.Description; -import com.cloudera.dataflow.spark.SparkPipelineOptions; +import org.apache.beam.runners.spark.SparkPipelineOptions; /** * Options used to configure Spark streaming. diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptionsFactory.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptionsFactory.java similarity index 94% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptionsFactory.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptionsFactory.java index 3b568af48b7ef..822feb4e5654d 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptionsFactory.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptionsFactory.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java similarity index 95% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java index 01c43751121a2..2c5414ddf67f0 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/SparkStreamingPipelineOptionsRegistrar.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.options.PipelineOptions; import com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar; diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingEvaluationContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingEvaluationContext.java similarity index 97% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingEvaluationContext.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingEvaluationContext.java index 5ecd562584146..9d1d7868cfcc3 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingEvaluationContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingEvaluationContext.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import java.util.LinkedHashMap; @@ -31,6 +31,9 @@ import com.google.cloud.dataflow.sdk.values.POutput; import com.google.cloud.dataflow.sdk.values.PValue; +import org.apache.beam.runners.spark.EvaluationContext; +import org.apache.beam.runners.spark.SparkRuntimeContext; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDDLike; import org.apache.spark.api.java.JavaSparkContext; @@ -40,9 +43,6 @@ import org.apache.spark.streaming.api.java.JavaStreamingContext; -import com.cloudera.dataflow.spark.EvaluationContext; -import com.cloudera.dataflow.spark.SparkRuntimeContext; - /** * Streaming evaluation context helps to handle streaming. */ diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingTransformTranslator.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingTransformTranslator.java similarity index 96% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingTransformTranslator.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingTransformTranslator.java index d8ae5e899dfa5..c78c7fa372b42 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingTransformTranslator.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingTransformTranslator.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; @@ -28,7 +28,6 @@ import com.google.cloud.dataflow.sdk.coders.VoidCoder; import com.google.cloud.dataflow.sdk.io.AvroIO; import com.google.cloud.dataflow.sdk.io.TextIO; -import com.google.cloud.dataflow.sdk.repackaged.com.google.common.reflect.TypeToken; import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform; import com.google.cloud.dataflow.sdk.transforms.Create; import com.google.cloud.dataflow.sdk.transforms.DoFn; @@ -45,8 +44,20 @@ import com.google.cloud.dataflow.sdk.values.PCollectionList; import com.google.cloud.dataflow.sdk.values.PDone; +import com.google.common.reflect.TypeToken; import kafka.serializer.Decoder; +import org.apache.beam.runners.spark.DoFnFunction; +import org.apache.beam.runners.spark.EvaluationContext; +import org.apache.beam.runners.spark.SparkPipelineTranslator; +import org.apache.beam.runners.spark.TransformEvaluator; +import org.apache.beam.runners.spark.TransformTranslator; +import org.apache.beam.runners.spark.WindowingHelpers; +import org.apache.beam.runners.spark.io.ConsoleIO; +import org.apache.beam.runners.spark.io.CreateStream; +import org.apache.beam.runners.spark.io.KafkaIO; +import org.apache.beam.runners.spark.io.hadoop.HadoopIO; + import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.streaming.Duration; @@ -59,16 +70,6 @@ import scala.Tuple2; -import com.cloudera.dataflow.hadoop.HadoopIO; -import com.cloudera.dataflow.io.ConsoleIO; -import com.cloudera.dataflow.io.CreateStream; -import com.cloudera.dataflow.io.KafkaIO; -import com.cloudera.dataflow.spark.DoFnFunction; -import com.cloudera.dataflow.spark.EvaluationContext; -import com.cloudera.dataflow.spark.SparkPipelineTranslator; -import com.cloudera.dataflow.spark.TransformEvaluator; -import com.cloudera.dataflow.spark.TransformTranslator; -import com.cloudera.dataflow.spark.WindowingHelpers; /** * Supports translation between a DataFlow transform, and Spark's operations on DStreams. diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingWindowPipelineDetector.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingWindowPipelineDetector.java similarity index 94% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingWindowPipelineDetector.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingWindowPipelineDetector.java index 406dfcc78924c..684401143b125 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/streaming/StreamingWindowPipelineDetector.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/streaming/StreamingWindowPipelineDetector.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.runners.TransformTreeNode; import com.google.cloud.dataflow.sdk.transforms.PTransform; @@ -25,12 +25,13 @@ import com.google.cloud.dataflow.sdk.values.PInput; import com.google.cloud.dataflow.sdk.values.POutput; +import org.apache.beam.runners.spark.SparkPipelineRunner; +import org.apache.beam.runners.spark.SparkPipelineTranslator; +import org.apache.beam.runners.spark.TransformTranslator; + import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.Durations; -import com.cloudera.dataflow.spark.SparkPipelineRunner; -import com.cloudera.dataflow.spark.SparkPipelineTranslator; -import com.cloudera.dataflow.spark.TransformTranslator; /** * Pipeline {@link SparkPipelineRunner.Evaluator} to detect windowing. diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/BroadcastHelper.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java similarity index 92% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/BroadcastHelper.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java index 8dca939c52478..af831c6f1596e 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/BroadcastHelper.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/BroadcastHelper.java @@ -13,19 +13,20 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.util; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.Serializable; import com.google.cloud.dataflow.sdk.coders.Coder; +import org.apache.beam.runners.spark.coders.CoderHelpers; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -abstract class BroadcastHelper implements Serializable { +public abstract class BroadcastHelper implements Serializable { /** * If the property {@code dataflow.spark.directBroadcast} is set to @@ -49,7 +50,7 @@ public static BroadcastHelper create(T value, Coder coder) { public abstract void broadcast(JavaSparkContext jsc); /** - * A {@link com.cloudera.dataflow.spark.BroadcastHelper} that relies on the underlying + * A {@link BroadcastHelper} that relies on the underlying * Spark serialization (Kryo) to broadcast values. This is appropriate when * broadcasting very large values, since no copy of the object is made. * @param @@ -77,7 +78,7 @@ public void broadcast(JavaSparkContext jsc) { } /** - * A {@link com.cloudera.dataflow.spark.BroadcastHelper} that uses a + * A {@link BroadcastHelper} that uses a * {@link Coder} to encode values as byte arrays * before broadcasting. * @param diff --git a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ByteArray.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/ByteArray.java similarity index 89% rename from runners/spark/src/main/java/com/cloudera/dataflow/spark/ByteArray.java rename to runners/spark/src/main/java/org/apache/beam/runners/spark/util/ByteArray.java index 06db57206d431..7679b9c51c570 100644 --- a/runners/spark/src/main/java/com/cloudera/dataflow/spark/ByteArray.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/util/ByteArray.java @@ -12,18 +12,18 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.util; import java.io.Serializable; import java.util.Arrays; import com.google.common.primitives.UnsignedBytes; -class ByteArray implements Serializable, Comparable { +public class ByteArray implements Serializable, Comparable { private final byte[] value; - ByteArray(byte[] value) { + public ByteArray(byte[] value) { this.value = value; } diff --git a/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar b/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar index 5733a86585235..98387a635ac15 100644 --- a/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar +++ b/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.options.PipelineOptionsRegistrar @@ -13,5 +13,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # -com.cloudera.dataflow.spark.SparkPipelineOptionsRegistrar -com.cloudera.dataflow.spark.streaming.SparkStreamingPipelineOptionsRegistrar \ No newline at end of file +org.apache.beam.runners.spark.SparkPipelineOptionsRegistrar +org.apache.beam.runners.spark.streaming.SparkStreamingPipelineOptionsRegistrar \ No newline at end of file diff --git a/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar b/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar index 26e0b3a19809b..972b1a305ce41 100644 --- a/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar +++ b/runners/spark/src/main/resources/META-INF/services/com.google.cloud.dataflow.sdk.runners.PipelineRunnerRegistrar @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -com.cloudera.dataflow.spark.SparkPipelineRunnerRegistrar \ No newline at end of file +org.apache.beam.runners.spark.SparkPipelineRunnerRegistrar \ No newline at end of file diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/CombineGloballyTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/CombineGloballyTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java index 667e949d53924..e36babee2a6d2 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/CombineGloballyTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; @@ -21,9 +21,10 @@ import com.google.cloud.dataflow.sdk.transforms.Create; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.collect.Iterables; +import org.junit.Test; + import java.util.Arrays; import java.util.List; -import org.junit.Test; import static org.junit.Assert.assertEquals; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/CombinePerKeyTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/CombinePerKeyTest.java similarity index 89% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/CombinePerKeyTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/CombinePerKeyTest.java index f9d5b46f40c05..aaa0dac7a889a 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/CombinePerKeyTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/CombinePerKeyTest.java @@ -13,18 +13,14 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.KvCoder; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; import com.google.cloud.dataflow.sdk.coders.VarLongCoder; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; -import com.google.cloud.dataflow.sdk.transforms.Create; -import com.google.cloud.dataflow.sdk.transforms.DoFn; -import com.google.cloud.dataflow.sdk.transforms.PTransform; -import com.google.cloud.dataflow.sdk.transforms.ParDo; -import com.google.cloud.dataflow.sdk.transforms.Sum; +import com.google.cloud.dataflow.sdk.transforms.*; import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.collect.ImmutableList; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/DeDupTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/DeDupTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/DeDupTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/DeDupTest.java index 7495aeb3e2d3c..3af0a57384406 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/DeDupTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/DeDupTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; @@ -22,10 +22,11 @@ import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.collect.ImmutableSet; +import org.junit.Test; + import java.util.Arrays; import java.util.List; import java.util.Set; -import org.junit.Test; /** * A test based on {@code DeDupExample} from the SDK. diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/DoFnOutputTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/DoFnOutputTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/DoFnOutputTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/DoFnOutputTest.java index 2b0947f115ed5..2aa1e6a2c7f25 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/DoFnOutputTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/DoFnOutputTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; @@ -21,9 +21,10 @@ import com.google.cloud.dataflow.sdk.transforms.DoFn; import com.google.cloud.dataflow.sdk.transforms.ParDo; import com.google.cloud.dataflow.sdk.values.PCollection; -import java.io.Serializable; import org.junit.Test; +import java.io.Serializable; + public class DoFnOutputTest implements Serializable { @Test public void test() throws Exception { diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/EmptyInputTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/EmptyInputTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/EmptyInputTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/EmptyInputTest.java index 6c89ca105c74b..cd7bc113affa6 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/EmptyInputTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/EmptyInputTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; @@ -22,9 +22,10 @@ import com.google.cloud.dataflow.sdk.transforms.SerializableFunction; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.collect.Iterables; +import org.junit.Test; + import java.util.Collections; import java.util.List; -import org.junit.Test; import static org.junit.Assert.assertEquals; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/MultiOutputWordCountTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/MultiOutputWordCountTest.java similarity index 83% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/MultiOutputWordCountTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/MultiOutputWordCountTest.java index 2df8493e1701f..d090cf6dd43b8 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/MultiOutputWordCountTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/MultiOutputWordCountTest.java @@ -13,30 +13,14 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.runners.AggregatorValues; -import com.google.cloud.dataflow.sdk.transforms.Aggregator; -import com.google.cloud.dataflow.sdk.transforms.ApproximateUnique; -import com.google.cloud.dataflow.sdk.transforms.Count; -import com.google.cloud.dataflow.sdk.transforms.Create; -import com.google.cloud.dataflow.sdk.transforms.DoFn; -import com.google.cloud.dataflow.sdk.transforms.Flatten; -import com.google.cloud.dataflow.sdk.transforms.Max; -import com.google.cloud.dataflow.sdk.transforms.PTransform; -import com.google.cloud.dataflow.sdk.transforms.ParDo; -import com.google.cloud.dataflow.sdk.transforms.Sum; -import com.google.cloud.dataflow.sdk.transforms.View; -import com.google.cloud.dataflow.sdk.values.KV; -import com.google.cloud.dataflow.sdk.values.PCollection; -import com.google.cloud.dataflow.sdk.values.PCollectionList; -import com.google.cloud.dataflow.sdk.values.PCollectionTuple; -import com.google.cloud.dataflow.sdk.values.PCollectionView; -import com.google.cloud.dataflow.sdk.values.TupleTag; -import com.google.cloud.dataflow.sdk.values.TupleTagList; +import com.google.cloud.dataflow.sdk.transforms.*; +import com.google.cloud.dataflow.sdk.values.*; import com.google.common.collect.Iterables; import org.junit.Assert; import org.junit.Test; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/SerializationTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/SerializationTest.java similarity index 93% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/SerializationTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/SerializationTest.java index 21a839b59e6a3..35bbf91ddb633 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/SerializationTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/SerializationTest.java @@ -13,25 +13,21 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.AtomicCoder; import com.google.cloud.dataflow.sdk.coders.Coder; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; -import com.google.cloud.dataflow.sdk.transforms.Aggregator; -import com.google.cloud.dataflow.sdk.transforms.Count; -import com.google.cloud.dataflow.sdk.transforms.Create; -import com.google.cloud.dataflow.sdk.transforms.DoFn; -import com.google.cloud.dataflow.sdk.transforms.PTransform; -import com.google.cloud.dataflow.sdk.transforms.ParDo; -import com.google.cloud.dataflow.sdk.transforms.Sum; +import com.google.cloud.dataflow.sdk.transforms.*; import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.base.Function; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import org.junit.Test; + import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -40,8 +36,6 @@ import java.util.Set; import java.util.regex.Pattern; -import org.junit.Test; - public class SerializationTest { public static class StringHolder { // not serializable diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/SideEffectsTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/SideEffectsTest.java similarity index 93% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/SideEffectsTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/SideEffectsTest.java index ce7acda5aa44c..44d8e0f97a543 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/SideEffectsTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/SideEffectsTest.java @@ -13,22 +13,21 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringDelegateCoder; import com.google.cloud.dataflow.sdk.transforms.Create; import com.google.cloud.dataflow.sdk.transforms.DoFn; import com.google.cloud.dataflow.sdk.transforms.ParDo; -import java.io.Serializable; -import java.net.URI; import org.junit.After; import org.junit.Before; import org.junit.Test; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import java.io.Serializable; +import java.net.URI; + +import static org.junit.Assert.*; public class SideEffectsTest implements Serializable { diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/SimpleWordCountTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/SimpleWordCountTest.java similarity index 89% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/SimpleWordCountTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/SimpleWordCountTest.java index 1c2f7a9e79a7f..f930855fef524 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/SimpleWordCountTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/SimpleWordCountTest.java @@ -13,28 +13,22 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; -import com.google.cloud.dataflow.sdk.transforms.Aggregator; -import com.google.cloud.dataflow.sdk.transforms.Count; -import com.google.cloud.dataflow.sdk.transforms.Create; -import com.google.cloud.dataflow.sdk.transforms.DoFn; -import com.google.cloud.dataflow.sdk.transforms.PTransform; -import com.google.cloud.dataflow.sdk.transforms.ParDo; -import com.google.cloud.dataflow.sdk.transforms.Sum; +import com.google.cloud.dataflow.sdk.transforms.*; import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.collect.ImmutableSet; +import org.junit.Test; + import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.regex.Pattern; -import org.junit.Test; - public class SimpleWordCountTest { private static final String[] WORDS_ARRAY = { "hi there", "hi", "hi sue bob", diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/TestSparkPipelineOptionsFactory.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/TestSparkPipelineOptionsFactory.java similarity index 96% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/TestSparkPipelineOptionsFactory.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/TestSparkPipelineOptionsFactory.java index 50cd0de69558c..3fc3eccaebff7 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/TestSparkPipelineOptionsFactory.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/TestSparkPipelineOptionsFactory.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import org.junit.Assert; import org.junit.Test; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/TfIdfTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/TfIdfTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/TfIdfTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/TfIdfTest.java index 35ab26e598af7..0a36c9e275be8 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/TfIdfTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/TfIdfTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.examples.complete.TfIdf; import com.google.cloud.dataflow.sdk.Pipeline; @@ -25,9 +25,10 @@ import com.google.cloud.dataflow.sdk.transforms.RemoveDuplicates; import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; +import org.junit.Test; + import java.net.URI; import java.util.Arrays; -import org.junit.Test; /** * A test based on {@code TfIdf} from the SDK. diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/TransformTranslatorTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/TransformTranslatorTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/TransformTranslatorTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/TransformTranslatorTest.java index 73b36438e5383..f759fe94fa7be 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/TransformTranslatorTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/TransformTranslatorTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.api.client.repackaged.com.google.common.base.Joiner; import com.google.cloud.dataflow.sdk.Pipeline; @@ -23,7 +23,6 @@ import com.google.cloud.dataflow.sdk.runners.PipelineRunner; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.base.Charsets; -import java.util.Collections; import org.apache.commons.io.FileUtils; import org.junit.Assert; import org.junit.Before; @@ -35,6 +34,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.Collections; import java.util.List; /** diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/WindowedWordCountTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/WindowedWordCountTest.java similarity index 98% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/WindowedWordCountTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/WindowedWordCountTest.java index c16878eef32b5..eb88542b7e9d9 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/WindowedWordCountTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/WindowedWordCountTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; @@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableList; import java.util.Arrays; import java.util.List; + import org.joda.time.Duration; import org.junit.Test; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/hadoop/WritableCoderTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/coders/WritableCoderTest.java similarity index 96% rename from runners/spark/src/test/java/com/cloudera/dataflow/hadoop/WritableCoderTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/coders/WritableCoderTest.java index 29a73b65b4f04..ad7256c299f70 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/hadoop/WritableCoderTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/coders/WritableCoderTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.hadoop; +package org.apache.beam.runners.spark.coders; import com.google.cloud.dataflow.sdk.testing.CoderProperties; import org.apache.hadoop.io.IntWritable; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/AvroPipelineTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java similarity index 96% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/AvroPipelineTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java index ea4cc38e39ee9..73dd2d3ff7ac0 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/AvroPipelineTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/AvroPipelineTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.io.AvroIO; @@ -32,6 +32,8 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; +import org.apache.beam.runners.spark.EvaluationResult; +import org.apache.beam.runners.spark.SparkPipelineRunner; import org.junit.Before; import org.junit.Rule; import org.junit.Test; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/NumShardsTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/NumShardsTest.java similarity index 91% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/NumShardsTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/io/NumShardsTest.java index 6849389d84e7c..39525b2d699ba 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/NumShardsTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/NumShardsTest.java @@ -13,7 +13,7 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io; import com.google.cloud.dataflow.examples.WordCount; import com.google.cloud.dataflow.sdk.Pipeline; @@ -21,21 +21,25 @@ import com.google.cloud.dataflow.sdk.io.TextIO; import com.google.cloud.dataflow.sdk.transforms.Create; import com.google.cloud.dataflow.sdk.transforms.MapElements; -import com.google.cloud.dataflow.sdk.transforms.ParDo; import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.base.Charsets; import com.google.common.collect.Sets; import com.google.common.io.Files; +import org.apache.beam.runners.spark.EvaluationResult; +import org.apache.beam.runners.spark.SparkPipelineOptions; +import org.apache.beam.runners.spark.SparkPipelineOptionsFactory; +import org.apache.beam.runners.spark.SparkPipelineRunner; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Set; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/HadoopFileFormatPipelineTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java similarity index 95% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/HadoopFileFormatPipelineTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java index 579ada554baa9..7a9be8bab43fb 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/HadoopFileFormatPipelineTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java @@ -13,15 +13,14 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; -import com.cloudera.dataflow.hadoop.HadoopIO; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; -import java.io.File; -import java.io.IOException; +import org.apache.beam.runners.spark.EvaluationResult; +import org.apache.beam.runners.spark.SparkPipelineRunner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; @@ -37,6 +36,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; +import java.io.File; +import java.io.IOException; + import static org.junit.Assert.assertEquals; public class HadoopFileFormatPipelineTest { diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/ShardNameBuilderTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/ShardNameBuilderTest.java similarity index 86% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/ShardNameBuilderTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/ShardNameBuilderTest.java index 341c214235620..b1d35d56a56ee 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/ShardNameBuilderTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/ShardNameBuilderTest.java @@ -13,15 +13,15 @@ * License. */ -package com.cloudera.dataflow.spark; +package org.apache.beam.runners.spark.io.hadoop; import org.junit.Test; -import static com.cloudera.dataflow.spark.ShardNameBuilder.getOutputDirectory; -import static com.cloudera.dataflow.spark.ShardNameBuilder.getOutputFilePrefix; -import static com.cloudera.dataflow.spark.ShardNameBuilder.getOutputFileTemplate; -import static com.cloudera.dataflow.spark.ShardNameBuilder.replaceShardCount; -import static com.cloudera.dataflow.spark.ShardNameBuilder.replaceShardNumber; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputDirectory; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFilePrefix; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.getOutputFileTemplate; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.replaceShardCount; +import static org.apache.beam.runners.spark.io.hadoop.ShardNameBuilder.replaceShardNumber; import static org.junit.Assert.assertEquals; public class ShardNameBuilderTest { diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/FlattenStreamingTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/FlattenStreamingTest.java similarity index 91% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/FlattenStreamingTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/FlattenStreamingTest.java index 087283467f537..828b26e011592 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/FlattenStreamingTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/FlattenStreamingTest.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; @@ -24,10 +24,10 @@ import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.cloud.dataflow.sdk.values.PCollectionList; -import com.cloudera.dataflow.io.CreateStream; -import com.cloudera.dataflow.spark.EvaluationResult; -import com.cloudera.dataflow.spark.SparkPipelineRunner; -import com.cloudera.dataflow.spark.streaming.utils.DataflowAssertStreaming; +import org.apache.beam.runners.spark.io.CreateStream; +import org.apache.beam.runners.spark.EvaluationResult; +import org.apache.beam.runners.spark.SparkPipelineRunner; +import org.apache.beam.runners.spark.streaming.utils.DataflowAssertStreaming; import org.joda.time.Duration; import org.junit.Test; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/KafkaStreamingTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java similarity index 88% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/KafkaStreamingTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java index f68aea88aa435..e9e685b37ffbd 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/KafkaStreamingTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java @@ -12,11 +12,9 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.Pipeline; -import com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.ImmutableMap; -import com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.ImmutableSet; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; import com.google.cloud.dataflow.sdk.transforms.DoFn; import com.google.cloud.dataflow.sdk.transforms.ParDo; @@ -26,11 +24,13 @@ import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; -import com.cloudera.dataflow.io.KafkaIO; -import com.cloudera.dataflow.spark.EvaluationResult; -import com.cloudera.dataflow.spark.SparkPipelineRunner; -import com.cloudera.dataflow.spark.streaming.utils.DataflowAssertStreaming; -import com.cloudera.dataflow.spark.streaming.utils.EmbeddedKafkaCluster; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.beam.runners.spark.io.KafkaIO; +import org.apache.beam.runners.spark.EvaluationResult; +import org.apache.beam.runners.spark.SparkPipelineRunner; +import org.apache.beam.runners.spark.streaming.utils.DataflowAssertStreaming; +import org.apache.beam.runners.spark.streaming.utils.EmbeddedKafkaCluster; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; @@ -60,10 +60,10 @@ public class KafkaStreamingTest { new Properties(), Collections.singletonList(6667)); private static final String TOPIC = "kafka_dataflow_test_topic"; private static final Map KAFKA_MESSAGES = ImmutableMap.of( - "k1", "v1", "k2", "v2", "k3", "v3", "k4", "v4" + "k1", "v1", "k2", "v2", "k3", "v3", "k4", "v4" ); private static final Set EXPECTED = ImmutableSet.of( - "k1,v1", "k2,v2", "k3,v3", "k4,v4" + "k1,v1", "k2,v2", "k3,v3", "k4,v4" ); private static final long TEST_TIMEOUT_MSEC = 1000L; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/SimpleStreamingWordCountTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/SimpleStreamingWordCountTest.java similarity index 88% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/SimpleStreamingWordCountTest.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/SimpleStreamingWordCountTest.java index e22e616264fca..9a0609d4e54be 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/SimpleStreamingWordCountTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/SimpleStreamingWordCountTest.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming; +package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; @@ -23,11 +23,11 @@ import com.google.cloud.dataflow.sdk.values.PCollection; import com.google.common.collect.ImmutableSet; -import com.cloudera.dataflow.io.CreateStream; -import com.cloudera.dataflow.spark.EvaluationResult; -import com.cloudera.dataflow.spark.SimpleWordCountTest; -import com.cloudera.dataflow.spark.SparkPipelineRunner; -import com.cloudera.dataflow.spark.streaming.utils.DataflowAssertStreaming; +import org.apache.beam.runners.spark.io.CreateStream; +import org.apache.beam.runners.spark.EvaluationResult; +import org.apache.beam.runners.spark.SimpleWordCountTest; +import org.apache.beam.runners.spark.SparkPipelineRunner; +import org.apache.beam.runners.spark.streaming.utils.DataflowAssertStreaming; import org.joda.time.Duration; import org.junit.Test; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/utils/DataflowAssertStreaming.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/utils/DataflowAssertStreaming.java similarity index 92% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/utils/DataflowAssertStreaming.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/utils/DataflowAssertStreaming.java index c0c5976e27143..19759d7be6f24 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/utils/DataflowAssertStreaming.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/utils/DataflowAssertStreaming.java @@ -12,9 +12,9 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming.utils; +package org.apache.beam.runners.spark.streaming.utils; -import com.cloudera.dataflow.spark.EvaluationResult; +import org.apache.beam.runners.spark.EvaluationResult; import org.junit.Assert; diff --git a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/utils/EmbeddedKafkaCluster.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/utils/EmbeddedKafkaCluster.java similarity index 99% rename from runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/utils/EmbeddedKafkaCluster.java rename to runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/utils/EmbeddedKafkaCluster.java index e75d7295c2125..333453aa98dd9 100644 --- a/runners/spark/src/test/java/com/cloudera/dataflow/spark/streaming/utils/EmbeddedKafkaCluster.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/utils/EmbeddedKafkaCluster.java @@ -12,7 +12,7 @@ * the specific language governing permissions and limitations under the * License. */ -package com.cloudera.dataflow.spark.streaming.utils; +package org.apache.beam.runners.spark.streaming.utils; import org.apache.zookeeper.server.NIOServerCnxnFactory; import org.apache.zookeeper.server.ServerCnxnFactory; From 65355dd3747c3673d3707e8afb63babaaf228cf2 Mon Sep 17 00:00:00 2001 From: Sela Date: Sat, 12 Mar 2016 17:26:34 +0200 Subject: [PATCH 02/10] [BEAM-11] set coder for pipeline input --- .../spark/io/hadoop/HadoopFileFormatPipelineTest.java | 5 ++++- .../runners/spark/streaming/KafkaStreamingTest.java | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java index 7a9be8bab43fb..abe1119b70cf0 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/io/hadoop/HadoopFileFormatPipelineTest.java @@ -16,11 +16,13 @@ package org.apache.beam.runners.spark.io.hadoop; import com.google.cloud.dataflow.sdk.Pipeline; +import com.google.cloud.dataflow.sdk.coders.KvCoder; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.values.KV; import com.google.cloud.dataflow.sdk.values.PCollection; import org.apache.beam.runners.spark.EvaluationResult; import org.apache.beam.runners.spark.SparkPipelineRunner; +import org.apache.beam.runners.spark.coders.WritableCoder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; @@ -66,7 +68,8 @@ public void testSequenceFile() throws Exception { (Class>) (Class) SequenceFileInputFormat.class; HadoopIO.Read.Bound read = HadoopIO.Read.from(inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class); - PCollection> input = p.apply(read); + PCollection> input = p.apply(read) + .setCoder(KvCoder.of(WritableCoder.of(IntWritable.class), WritableCoder.of(Text.class))); @SuppressWarnings("unchecked") Class> outputFormatClass = (Class>) (Class) TemplatedSequenceFileOutputFormat.class; diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java index e9e685b37ffbd..ff1e11cbdddeb 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/streaming/KafkaStreamingTest.java @@ -15,6 +15,8 @@ package org.apache.beam.runners.spark.streaming; import com.google.cloud.dataflow.sdk.Pipeline; +import com.google.cloud.dataflow.sdk.coders.KvCoder; +import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; import com.google.cloud.dataflow.sdk.transforms.DoFn; import com.google.cloud.dataflow.sdk.transforms.ParDo; @@ -79,7 +81,7 @@ public static void init() throws IOException { producerProps.put("bootstrap.servers", EMBEDDED_KAFKA_CLUSTER.getBrokerList()); Serializer stringSerializer = new StringSerializer(); try (@SuppressWarnings("unchecked") KafkaProducer kafkaProducer = - new KafkaProducer(producerProps, stringSerializer, stringSerializer)) { + new KafkaProducer(producerProps, stringSerializer, stringSerializer)) { for (Map.Entry en : KAFKA_MESSAGES.entrySet()) { kafkaProducer.send(new ProducerRecord<>(TOPIC, en.getKey(), en.getValue())); } @@ -96,13 +98,14 @@ public void testRun() throws Exception { Pipeline p = Pipeline.create(options); Map kafkaParams = ImmutableMap.of( - "metadata.broker.list", EMBEDDED_KAFKA_CLUSTER.getBrokerList(), - "auto.offset.reset", "smallest" + "metadata.broker.list", EMBEDDED_KAFKA_CLUSTER.getBrokerList(), + "auto.offset.reset", "smallest" ); PCollection> kafkaInput = p.apply(KafkaIO.Read.from(StringDecoder.class, StringDecoder.class, String.class, String.class, Collections.singleton(TOPIC), - kafkaParams)); + kafkaParams)) + .setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())); PCollection> windowedWords = kafkaInput .apply(Window.>into(FixedWindows.of(Duration.standardSeconds(1)))); From b34886e8962d12c57430042c9b4edce690273e32 Mon Sep 17 00:00:00 2001 From: Sela Date: Sat, 12 Mar 2016 17:27:38 +0200 Subject: [PATCH 03/10] [BEAM-11] extractOutput() should not return null --- .../java/org/apache/beam/runners/spark/CombineGloballyTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java index e36babee2a6d2..49e68f55e2e95 100644 --- a/runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java +++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/CombineGloballyTest.java @@ -73,7 +73,7 @@ public StringBuilder mergeAccumulators(Iterable accumulators) { @Override public String extractOutput(StringBuilder accumulator) { - return accumulator.toString(); + return accumulator != null ? accumulator.toString(): ""; } private static StringBuilder combine(StringBuilder accum, String datum) { From 6cfa32bd9195f55a294c15a28765ef5b90095032 Mon Sep 17 00:00:00 2001 From: Sela Date: Sat, 12 Mar 2016 17:28:15 +0200 Subject: [PATCH 04/10] [BEAM-11] This is a placeholder to get the TfIdfTest working. Should be replaced by a SparkStateInternals implementation --- .../apache/beam/runners/spark/SparkProcessContext.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java index c63415233904f..f4d2ca0c474fc 100644 --- a/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java +++ b/runners/spark/src/main/java/org/apache/beam/runners/spark/SparkProcessContext.java @@ -30,7 +30,7 @@ import com.google.cloud.dataflow.sdk.util.TimerInternals; import com.google.cloud.dataflow.sdk.util.WindowedValue; import com.google.cloud.dataflow.sdk.util.WindowingInternals; -import com.google.cloud.dataflow.sdk.util.state.StateInternals; +import com.google.cloud.dataflow.sdk.util.state.*; import com.google.cloud.dataflow.sdk.values.PCollectionView; import com.google.cloud.dataflow.sdk.values.TupleTag; import com.google.common.collect.AbstractIterator; @@ -154,8 +154,10 @@ public void outputWindowedValue(O output, Instant timestamp, Collection Date: Sat, 12 Mar 2016 17:54:02 +0200 Subject: [PATCH 05/10] [BEAM-11] Add Spark Beam runner module --- runners/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/runners/pom.xml b/runners/pom.xml index 757e2081d1cce..eb77517b9d6e1 100644 --- a/runners/pom.xml +++ b/runners/pom.xml @@ -38,6 +38,7 @@ flink + spark From fca5a093a2d6a1d9fa55554c917a1b350877f936 Mon Sep 17 00:00:00 2001 From: Sela Date: Mon, 14 Mar 2016 18:50:37 +0200 Subject: [PATCH 06/10] [BEAM-11] relocate Guava used by Dataflow (v19) since it conflicts with version used by Hadoop (v11) --- runners/spark/pom.xml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml index a0601611daee8..8d2704f56cd06 100644 --- a/runners/spark/pom.xml +++ b/runners/spark/pom.xml @@ -346,20 +346,20 @@ License. shade - - - - - - - - - - - - - - + + + + + com.google.common + org.apache.beam.spark.relocated.com.google.common + + + true + spark-app + + + + From 543c82bd90030a52503c614231c5183d8ed4cf39 Mon Sep 17 00:00:00 2001 From: Sela Date: Mon, 14 Mar 2016 19:09:14 +0200 Subject: [PATCH 07/10] [BEAM-11] Replaced license headers to ASF license --- runners/spark/build-resources/checkstyle.xml | 27 ++++++++++--------- runners/spark/build-resources/header-file.txt | 23 +++++++++------- .../beam/runners/spark/DoFnFunction.java | 23 +++++++++------- .../beam/runners/spark/EvaluationContext.java | 23 +++++++++------- .../beam/runners/spark/EvaluationResult.java | 23 +++++++++------- .../beam/runners/spark/MultiDoFnFunction.java | 23 +++++++++------- .../runners/spark/SparkContextFactory.java | 23 +++++++++------- .../runners/spark/SparkPipelineEvaluator.java | 23 +++++++++------- .../runners/spark/SparkPipelineOptions.java | 23 +++++++++------- .../spark/SparkPipelineOptionsFactory.java | 23 +++++++++------- .../spark/SparkPipelineOptionsRegistrar.java | 23 +++++++++------- .../runners/spark/SparkPipelineRunner.java | 23 +++++++++------- .../spark/SparkPipelineRunnerRegistrar.java | 23 +++++++++------- .../spark/SparkPipelineTranslator.java | 23 +++++++++------- .../runners/spark/SparkProcessContext.java | 23 +++++++++------- .../runners/spark/SparkRuntimeContext.java | 23 +++++++++------- .../runners/spark/TransformEvaluator.java | 23 +++++++++------- .../runners/spark/TransformTranslator.java | 23 +++++++++------- .../beam/runners/spark/WindowingHelpers.java | 23 +++++++++------- .../spark/aggregators/AggAccumParam.java | 23 +++++++++------- .../spark/aggregators/NamedAggregators.java | 23 +++++++++------- .../runners/spark/coders/CoderHelpers.java | 23 +++++++++------- .../spark/coders/NullWritableCoder.java | 23 +++++++++------- .../runners/spark/coders/WritableCoder.java | 23 +++++++++------- .../beam/runners/spark/io/ConsoleIO.java | 23 +++++++++------- .../beam/runners/spark/io/CreateStream.java | 23 +++++++++------- .../apache/beam/runners/spark/io/KafkaIO.java | 23 +++++++++------- .../runners/spark/io/hadoop/HadoopIO.java | 23 +++++++++------- .../spark/io/hadoop/ShardNameBuilder.java | 23 +++++++++------- .../io/hadoop/ShardNameTemplateAware.java | 23 +++++++++------- .../io/hadoop/ShardNameTemplateHelper.java | 23 +++++++++------- .../hadoop/TemplatedAvroKeyOutputFormat.java | 23 +++++++++------- .../TemplatedSequenceFileOutputFormat.java | 23 +++++++++------- .../io/hadoop/TemplatedTextOutputFormat.java | 23 +++++++++------- .../SparkStreamingPipelineOptions.java | 23 +++++++++------- .../SparkStreamingPipelineOptionsFactory.java | 23 +++++++++------- ...parkStreamingPipelineOptionsRegistrar.java | 23 +++++++++------- .../streaming/StreamingEvaluationContext.java | 23 +++++++++------- .../StreamingTransformTranslator.java | 23 +++++++++------- .../StreamingWindowPipelineDetector.java | 23 +++++++++------- .../runners/spark/util/BroadcastHelper.java | 23 +++++++++------- .../beam/runners/spark/util/ByteArray.java | 23 +++++++++------- .../runners/spark/CombineGloballyTest.java | 23 +++++++++------- .../beam/runners/spark/CombinePerKeyTest.java | 23 +++++++++------- .../apache/beam/runners/spark/DeDupTest.java | 23 +++++++++------- .../beam/runners/spark/DoFnOutputTest.java | 23 +++++++++------- .../beam/runners/spark/EmptyInputTest.java | 23 +++++++++------- .../spark/MultiOutputWordCountTest.java | 23 +++++++++------- .../beam/runners/spark/SerializationTest.java | 23 +++++++++------- .../beam/runners/spark/SideEffectsTest.java | 23 +++++++++------- .../runners/spark/SimpleWordCountTest.java | 23 +++++++++------- .../TestSparkPipelineOptionsFactory.java | 23 +++++++++------- .../apache/beam/runners/spark/TfIdfTest.java | 23 +++++++++------- .../spark/TransformTranslatorTest.java | 23 +++++++++------- .../runners/spark/WindowedWordCountTest.java | 23 +++++++++------- .../spark/coders/WritableCoderTest.java | 23 +++++++++------- .../runners/spark/io/AvroPipelineTest.java | 23 +++++++++------- .../beam/runners/spark/io/NumShardsTest.java | 23 +++++++++------- .../hadoop/HadoopFileFormatPipelineTest.java | 23 +++++++++------- .../spark/io/hadoop/ShardNameBuilderTest.java | 23 +++++++++------- .../spark/streaming/FlattenStreamingTest.java | 23 +++++++++------- .../spark/streaming/KafkaStreamingTest.java | 23 +++++++++------- .../SimpleStreamingWordCountTest.java | 23 +++++++++------- .../utils/DataflowAssertStreaming.java | 23 +++++++++------- .../streaming/utils/EmbeddedKafkaCluster.java | 23 +++++++++------- 65 files changed, 847 insertions(+), 652 deletions(-) diff --git a/runners/spark/build-resources/checkstyle.xml b/runners/spark/build-resources/checkstyle.xml index c5b884d7a9dc6..033d8ae476ded 100644 --- a/runners/spark/build-resources/checkstyle.xml +++ b/runners/spark/build-resources/checkstyle.xml @@ -3,18 +3,21 @@ "-//Puppy Crawl//DTD Check Configuration 1.2//EN" "http://www.puppycrawl.com/dtds/configuration_1_2.dtd">